[llvm] 1f73f0c - [NFC][AMDGPU] Update cost model tests:

Mon Sep 27 09:26:27 PDT 2021

Author: Daniil Fukalov
Date: 2021-09-27T19:26:02+03:00
New Revision: 1f73f0c19d87abc0de801bdae27393bb60e7bd5a

URL: https://github.com/llvm/llvm-project/commit/1f73f0c19d87abc0de801bdae27393bb60e7bd5a
DIFF: https://github.com/llvm/llvm-project/commit/1f73f0c19d87abc0de801bdae27393bb60e7bd5a.diff

LOG: [NFC][AMDGPU] Update cost model tests:

1. Convert to generated tests.
2. Added code-size case in few places.

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
    llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
    llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll
    llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll
    llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll
    llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
    llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
    llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
    llvm/test/Analysis/CostModel/AMDGPU/fma.ll
    llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
    llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
    llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
    llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
    llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll
    llvm/test/Analysis/CostModel/AMDGPU/mul.ll
    llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll
    llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll
    llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
    llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
index 1328025f1c3c2..930ebc89a9e31 100644

--- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
@@ -1,20 +1,27 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST16,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOW16,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST16,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOW16,ALL %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,FAST16 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SLOW16 %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL-SIZE,FAST16-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW16-SIZE %s
 ; END.
 
-; ALL-LABEL: 'add_i32'
-; ALL: estimated cost of 1 for {{.*}} add i32
-; ALL: estimated cost of 2 for {{.*}} add <2 x i32>
-;;; Allow for 4 when v3i32 is illegal and TargetLowering thinks it needs widening,
-;;; and 3 when it is legal.
-; ALL: estimated cost of {{[34]}} for {{.*}} add <3 x i32>
-; ALL: estimated cost of 4 for {{.*}} add <4 x i32>
-;;; Allow for 8 when v3i32 is illegal and TargetLowering thinks it needs widening,
-;;; and 5 when it is legal.
-; ALL: estimated cost of {{[58]}} for {{.*}} add <5 x i32>
 define amdgpu_kernel void @add_i32() #0 {
+; ALL-LABEL: 'add_i32'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = add i32 undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = add <2 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'add_i32'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = add i32 undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = add <2 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i32 = add i32 undef, undef
   %v2i32 = add <2 x i32> undef, undef
   %v3i32 = add <3 x i32> undef, undef
@@ -23,13 +30,23 @@ define amdgpu_kernel void @add_i32() #0 {
   ret void
 }
 
-; ALL-LABEL: 'add_i64'
-; ALL: estimated cost of 2 for {{.*}} add i64
-; ALL: estimated cost of 4 for {{.*}} add <2 x i64>
-; ALL: estimated cost of 6 for {{.*}} add <3 x i64>
-; ALL: estimated cost of 8 for {{.*}} add <4 x i64>
-; ALL: estimated cost of 128 for {{.*}} add <16 x i64>
 define amdgpu_kernel void @add_i64() #0 {
+; ALL-LABEL: 'add_i64'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = add i64 undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'add_i64'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = add i64 undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i64 = add i64 undef, undef
   %v2i64 = add <2 x i64> undef, undef
   %v3i64 = add <3 x i64> undef, undef
@@ -38,23 +55,61 @@ define amdgpu_kernel void @add_i64() #0 {
   ret void
 }
 
-; ALL-LABEL: 'add_i16'
-; ALL: estimated cost of 1 for {{.*}} add i16
-; SLOW16: estimated cost of 2 for {{.*}} add <2 x i16>
-; FAST16: estimated cost of 1 for {{.*}} add <2 x i16>
 define amdgpu_kernel void @add_i16() #0 {
+; FAST16-LABEL: 'add_i16'
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = add <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW16-LABEL: 'add_i16'
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = add <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FAST16-SIZE-LABEL: 'add_i16'
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = add <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SLOW16-SIZE-LABEL: 'add_i16'
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = add <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i16 = add i16 undef, undef
   %v2i16 = add <2 x i16> undef, undef
   ret void
 }
 
-; ALL-LABEL: 'sub'
-; ALL: estimated cost of 1 for {{.*}} sub i32
-; ALL: estimated cost of 2 for {{.*}} sub i64
-; ALL: estimated cost of 1 for {{.*}} sub i16
-; SLOW16: estimated cost of 2 for {{.*}} sub <2 x i16>
-; FAST16: estimated cost of 1 for {{.*}} sub <2 x i16>
 define amdgpu_kernel void @sub() #0 {
+; FAST16-LABEL: 'sub'
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = sub <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW16-LABEL: 'sub'
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = sub <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FAST16-SIZE-LABEL: 'sub'
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = sub <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SLOW16-SIZE-LABEL: 'sub'
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = sub <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i32 = sub i32 undef, undef
   %i64 = sub i64 undef, undef
   %i16 = sub i16 undef, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
index 8ca13eed2f43f..8d558e4366171 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
@@ -1,68 +1,63 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=ALL-SIZE %s
 ; END.
 
-; CHECK-LABEL: 'addrspacecast_global_to_flat'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8*
-define i8* @addrspacecast_global_to_flat(i8 addrspace(1)* %ptr) #0 {
-  %cast = addrspacecast i8 addrspace(1)* %ptr to i8*
-  ret i8* %cast
-}
-
-; CHECK-LABEL: 'addrspacecast_global_to_flat_v2'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8*>
-define <2 x i8*> @addrspacecast_global_to_flat_v2(<2 x i8 addrspace(1)*> %ptr) #0 {
-  %cast = addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8*>
-  ret <2 x i8*> %cast
-}
-
-; CHECK-LABEL: 'addrspacecast_global_to_flat_v32'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8*>
-define <32 x i8*> @addrspacecast_global_to_flat_v32(<32 x i8 addrspace(1)*> %ptr) #0 {
-  %cast = addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8*>
-  ret <32 x i8*> %cast
-}
-
-; CHECK-LABEL: 'addrspacecast_local_to_flat'
-; CHECK: estimated cost of 1 for {{.*}} addrspacecast i8 addrspace(3)* %ptr to i8*
-define i8* @addrspacecast_local_to_flat(i8 addrspace(3)* %ptr) #0 {
-  %cast = addrspacecast i8 addrspace(3)* %ptr to i8*
-  ret i8* %cast
-}
-
-; CHECK-LABEL: 'addrspacecast_local_to_flat_v2'
-; CHECK: estimated cost of 2 for {{.*}} addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8*>
-define <2 x i8*> @addrspacecast_local_to_flat_v2(<2 x i8 addrspace(3)*> %ptr) #0 {
-  %cast = addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8*>
-  ret <2 x i8*> %cast
-}
-
-; CHECK-LABEL: 'addrspacecast_local_to_flat_v32'
-; CHECK: estimated cost of 32 for {{.*}} addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8*>
-define <32 x i8*> @addrspacecast_local_to_flat_v32(<32 x i8 addrspace(3)*> %ptr) #0 {
-  %cast = addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8*>
-  ret <32 x i8*> %cast
-}
-
-; CHECK-LABEL: 'addrspacecast_flat_to_local'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8* %ptr to i8 addrspace(3)*
-define i8 addrspace(3)* @addrspacecast_flat_to_local(i8* %ptr) #0 {
-  %cast = addrspacecast i8* %ptr to i8 addrspace(3)*
-  ret i8 addrspace(3)* %cast
-}
-
-; CHECK-LABEL: 'addrspacecast_flat_to_local_v2'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8*> %ptr to <2 x i8 addrspace(3)*>
-define <2 x i8 addrspace(3)*> @addrspacecast_flat_to_local_v2(<2 x i8*> %ptr) #0 {
-  %cast = addrspacecast <2 x i8*> %ptr to <2 x i8 addrspace(3)*>
-  ret <2 x i8 addrspace(3)*> %cast
-}
-
-; CHECK-LABEL: 'addrspacecast_flat_to_local_v32'
-; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8*> %ptr to <32 x i8 addrspace(3)*>
-define <32 x i8 addrspace(3)*> @addrspacecast_flat_to_local_v32(<32 x i8*> %ptr) #0 {
-  %cast = addrspacecast <32 x i8*> %ptr to <32 x i8 addrspace(3)*>
-  ret <32 x i8 addrspace(3)*> %cast
+define void @addrspacecast_global_to_flat() #0 {
+; ALL-LABEL: 'addrspacecast_global_to_flat'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8 addrspace(1)* undef to i8*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'addrspacecast_global_to_flat'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8 addrspace(1)* undef to i8*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %i8ptr = addrspacecast i8 addrspace(1)* undef to i8*
+  %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*>
+  %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*>
+  ret void
+}
+
+define void @addrspacecast_local_to_flat() #0 {
+; ALL-LABEL: 'addrspacecast_local_to_flat'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8ptr = addrspacecast i8 addrspace(3)* undef to i8*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'addrspacecast_local_to_flat'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8ptr = addrspacecast i8 addrspace(3)* undef to i8*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %i8ptr = addrspacecast i8 addrspace(3)* undef to i8*
+  %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*>
+  %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*>
+  ret void
+}
+
+define void @addrspacecast_flat_to_local() #0 {
+; ALL-LABEL: 'addrspacecast_flat_to_local'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8* undef to i8 addrspace(3)*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'addrspacecast_flat_to_local'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8* undef to i8 addrspace(3)*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %i8ptr = addrspacecast i8* undef to i8 addrspace(3)*
+  %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*>
+  %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*>
+  ret void
 }
 
 attributes #0 = { nounwind readnone }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll b/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll
index 63f7ab74e2006..042b43bc52652 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll
@@ -1,39 +1,97 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW16 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST16 %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW16-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST16-SIZE %s
 ; END.
 
-; ALL-LABEL: 'or'
-; ALL: estimated cost of 1 for {{.*}} or i32
-; ALL: estimated cost of 2 for {{.*}} or i64
-; SLOW16: estimated cost of 2 for {{.*}} or <2 x i16>
-; FAST16: estimated cost of 1 for {{.*}} or <2 x i16>
 define amdgpu_kernel void @or() #0 {
+; SLOW16-LABEL: 'or'
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = or <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FAST16-LABEL: 'or'
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = or <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW16-SIZE-LABEL: 'or'
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = or <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FAST16-SIZE-LABEL: 'or'
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = or <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i32 = or i32 undef, undef
   %i64 = or i64 undef, undef
   %v2i16 = or <2 x i16> undef, undef
   ret void
 }
 
-; ALL-LABEL: 'xor'
-; ALL: estimated cost of 1 for {{.*}} xor i32
-; ALL: estimated cost of 2 for {{.*}} xor i64
-; SLOW16: estimated cost of 2 for {{.*}} xor <2 x i16>
-; FAST16: estimated cost of 1 for {{.*}} xor <2 x i16>
 define amdgpu_kernel void @xor() #0 {
+; SLOW16-LABEL: 'xor'
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FAST16-LABEL: 'xor'
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW16-SIZE-LABEL: 'xor'
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FAST16-SIZE-LABEL: 'xor'
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i32 = xor i32 undef, undef
   %i64 = xor i64 undef, undef
   %v2i16 = xor <2 x i16> undef, undef
   ret void
 }
 
-; ALL-LABEL: 'and'
-; ALL: estimated cost of 1 for {{.*}} and i32
-; ALL: estimated cost of 2 for {{.*}} and i64
-; SLOW16: estimated cost of 2 for {{.*}} and <2 x i16>
-; FAST16: estimated cost of 1 for {{.*}} and <2 x i16>
 define amdgpu_kernel void @and() #0 {
+; SLOW16-LABEL: 'and'
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = and <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FAST16-LABEL: 'and'
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = and <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW16-SIZE-LABEL: 'and'
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = and <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FAST16-SIZE-LABEL: 'and'
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = and <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i32 = and i32 undef, undef
   %i64 = and i64 undef, undef
   %v2i16 = and <2 x i16> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll b/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll
index dbe55ff7bfc86..425b252468b70 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll
@@ -1,53 +1,63 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck  --check-prefixes=ALL,SPEED %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck --check-prefixes=ALL,SIZE %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck  --check-prefixes=ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck --check-prefixes=ALL-SIZE %s
 ; END.
 
+define amdgpu_kernel void @test_br_cost(i32 addrspace(1)* %vaddr, i32 %b) #0 {
 ; ALL-LABEL: 'test_br_cost'
-; SPEED-NEXT: estimated cost of 7 for instruction: br i1
-; SPEED: estimated cost of 4 for instruction: br label
-; SPEED-NEXT: estimated cost of 1 for instruction: %phi = phi i32 [
-; SPEED-NEXT: estimated cost of 10 for instruction: ret void
-; SIZE-NEXT: estimated cost of 5 for instruction: br i1
-; SIZE: estimated cost of 1 for instruction: br label
-; SIZE-NEXT: estimated cost of 0 for instruction: %phi = phi i32 [
-; SIZE-NEXT: estimated cost of 1 for instruction: ret void
-define amdgpu_kernel void @test_br_cost(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
+; ALL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: br i1 undef, label %bb1, label %bb2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: br label %bb2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %phi = phi i32 [ %b, %bb0 ], [ undef, %bb1 ]
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'test_br_cost'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: br i1 undef, label %bb1, label %bb2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %bb2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %phi = phi i32 [ %b, %bb0 ], [ undef, %bb1 ]
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
 bb0:
   br i1 undef, label %bb1, label %bb2
 
 bb1:
-  %vec = load i32, i32 addrspace(1)* %vaddr
-  %add = add i32 %vec, %b
-  store i32 %add, i32 addrspace(1)* %out
   br label %bb2
 
 bb2:
-  %phi = phi i32 [ %b, %bb0 ], [ %add, %bb1 ]
+  %phi = phi i32 [ %b, %bb0 ], [ undef, %bb1 ]
   ret void
 }
 
-; ALL-LABEL: 'test_switch_cost'
-; SPEED-NEXT: estimated cost of 24 for instruction:   switch
-; SIZE-NEXT: estimated cost of 18 for instruction:   switch
 define amdgpu_kernel void @test_switch_cost(i32 %a) #0 {
+; ALL-LABEL: 'test_switch_cost'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: switch i32 %a, label %default [
+; ALL-NEXT:    i32 0, label %case0
+; ALL-NEXT:    i32 1, label %case1
+; ALL-NEXT:    ]
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'test_switch_cost'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: switch i32 %a, label %default [
+; ALL-SIZE-NEXT:    i32 0, label %case0
+; ALL-SIZE-NEXT:    i32 1, label %case1
+; ALL-SIZE-NEXT:    ]
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
 entry:
   switch i32 %a, label %default [
-    i32 0, label %case0
-    i32 1, label %case1
+  i32 0, label %case0
+  i32 1, label %case1
   ]
 
 case0:
-  store volatile i32 undef, i32 addrspace(1)* undef
   ret void
 
 case1:
-  store volatile i32 undef, i32 addrspace(1)* undef
   ret void
 
 default:
-  store volatile i32 undef, i32 addrspace(1)* undef
-  ret void
-
-end:
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll
index 1af8d862732c3..f907148ff6f83 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll
@@ -1,20 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s
 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,GFX89 %s
 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX89 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,GFX89 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX89 %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN-SIZE,CI-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN-SIZE,GFX89-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN-SIZE,GFX89-SIZE %s
 ; END.
 
-; GCN-LABEL: 'extractelement_32'
-; GCN-NEXT: estimated cost of 0 for {{.*}} extractelement <2 x i32>
-; GCN-NEXT: estimated cost of 0 for {{.*}} extractelement <2 x float>
-; GCN-NEXT: estimated cost of 0 for {{.*}} extractelement <3 x i32>
-; GCN-NEXT: estimated cost of 0 for {{.*}} extractelement <4 x i32>
-; GCN-NEXT: estimated cost of 0 for {{.*}} extractelement <5 x i32>
-; GCN-NEXT: estimated cost of 0 for {{.*}} extractelement <8 x i32>
-; GCN-NEXT: estimated cost of 2 for {{.*}} extractelement <8 x i32>
 define amdgpu_kernel void @extractelement_32(i32 %arg) {
+; GCN-LABEL: 'extractelement_32'
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_1 = extractelement <3 x i32> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_1 = extractelement <4 x i32> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_1 = extractelement <5 x i32> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_1 = extractelement <8 x i32> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
+; GCN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GCN-SIZE-LABEL: 'extractelement_32'
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_1 = extractelement <3 x i32> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_1 = extractelement <4 x i32> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_1 = extractelement <5 x i32> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_1 = extractelement <8 x i32> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %v2i32_1 = extractelement <2 x i32> undef, i32 1
   %v2f32_1 = extractelement <2 x float> undef, i32 1
   %v3i32_1 = extractelement <3 x i32> undef, i32 1
@@ -25,12 +38,21 @@ define amdgpu_kernel void @extractelement_32(i32 %arg) {
   ret void
 }
 
-; GCN-LABEL: 'extractelement_64'
-; GCN-NEXT: estimated cost of 0 for {{.*}} extractelement <2 x i64>
-; GCN-NEXT: estimated cost of 0 for {{.*}} extractelement <3 x i64>
-; GCN-NEXT: estimated cost of 0 for {{.*}} extractelement <4 x i64>
-; GCN-NEXT: estimated cost of 0 for {{.*}} extractelement <8 x i64>
 define amdgpu_kernel void @extractelement_64() {
+; GCN-LABEL: 'extractelement_64'
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i64 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = extractelement <3 x i64> undef, i64 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = extractelement <4 x i64> undef, i64 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_1 = extractelement <8 x i64> undef, i64 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GCN-SIZE-LABEL: 'extractelement_64'
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i64 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = extractelement <3 x i64> undef, i64 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = extractelement <4 x i64> undef, i64 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_1 = extractelement <8 x i64> undef, i64 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %v2i64_1 = extractelement <2 x i64> undef, i64 1
   %v3i64_1 = extractelement <3 x i64> undef, i64 1
   %v4i64_1 = extractelement <4 x i64> undef, i64 1
@@ -38,19 +60,44 @@ define amdgpu_kernel void @extractelement_64() {
   ret void
 }
 
-; GCN-LABEL: 'extractelement_8'
-; GCN-NEXT: estimated cost of 1 for {{.*}} extractelement <4 x i8>
 define amdgpu_kernel void @extractelement_8() {
+; GCN-LABEL: 'extractelement_8'
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i8 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GCN-SIZE-LABEL: 'extractelement_8'
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i8 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %v4i8_1 = extractelement <4 x i8> undef, i8 1
   ret void
 }
 
-; GCN-LABEL: 'extractelement_16'
-; CI-NEXT: estimated cost of 1 for {{.*}} extractelement <2 x i16> undef, i16 0
-; GFX89-NEXT: estimated cost of 0 for {{.*}} extractelement <2 x i16>
-; GCN-NEXT: estimated cost of 1 for {{.*}} extractelement <2 x i16>
-; GCN-NEXT: estimated cost of 1 for {{.*}} extractelement <2 x i16>
 define amdgpu_kernel void @extractelement_16(i32 %arg) {
+; CI-LABEL: 'extractelement_16'
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; CI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX89-LABEL: 'extractelement_16'
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; CI-SIZE-LABEL: 'extractelement_16'
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; GFX89-SIZE-LABEL: 'extractelement_16'
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %v2i16_0 = extractelement <2 x i16> undef, i16 0
   %v2i16_1 = extractelement <2 x i16> undef, i16 1
   %v2i16_a = extractelement <2 x i16> undef, i32 %arg

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
index a616d455ce80c..7475a4d1bef50 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
@@ -1,13 +1,23 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s
 ; END.
 
-; CHECK-LABEL: 'fabs_f32'
-; CHECK: estimated cost of 0 for {{.*}} call float @llvm.fabs.f32
-; CHECK: estimated cost of 0 for {{.*}} call <2 x float> @llvm.fabs.v2f32
-; CHECK: estimated cost of 0 for {{.*}} call <3 x float> @llvm.fabs.v3f32
-; CHECK: estimated cost of 0 for {{.*}} call <5 x float> @llvm.fabs.v5f32
 define amdgpu_kernel void @fabs_f32() #0 {
+; ALL-LABEL: 'fabs_f32'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32 = call float @llvm.fabs.f32(float undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'fabs_f32'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32 = call float @llvm.fabs.f32(float undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = call float @llvm.fabs.f32(float undef) #1
   %v2f32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) #1
   %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #1
@@ -15,22 +25,38 @@ define amdgpu_kernel void @fabs_f32() #0 {
   ret void
 }
 
-; CHECK-LABEL: 'fabs_f64'
-; CHECK: estimated cost of 0 for {{.*}} call double @llvm.fabs.f64
-; CHECK: estimated cost of 0 for {{.*}} call <2 x double> @llvm.fabs.v2f64
-; CHECK: estimated cost of 0 for {{.*}} call <3 x double> @llvm.fabs.v3f64
 define amdgpu_kernel void @fabs_f64() #0 {
+; ALL-LABEL: 'fabs_f64'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64 = call double @llvm.fabs.f64(double undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'fabs_f64'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64 = call double @llvm.fabs.f64(double undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f64 = call double @llvm.fabs.f64(double undef) #1
   %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #1
   %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #1
   ret void
 }
 
-; CHECK-LABEL: 'fabs_f16'
-; CHECK: estimated cost of 0 for {{.*}} call half @llvm.fabs.f16
-; CHECK: estimated cost of 0 for {{.*}} call <2 x half> @llvm.fabs.v2f16
-; CHECK: estimated cost of 0 for {{.*}} call <3 x half> @llvm.fabs.v3f16
 define amdgpu_kernel void @fabs_f16() #0 {
+; ALL-LABEL: 'fabs_f16'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16 = call half @llvm.fabs.f16(half undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'fabs_f16'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16 = call half @llvm.fabs.f16(half undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f16 = call half @llvm.fabs.f16(half undef) #1
   %v2f16 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef) #1
   %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #1

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
index b79a09c2c31f3..a733f4adba117 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
@@ -1,23 +1,41 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16,SIZEALL,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF16,SIZEALL,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX90A-FASTF64,FASTF16,PACKEDF32,ALL %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16,GFX90A-FASTF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32,FASTF16,FASTF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32,SLOWF64 %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16-SIZE,GFX90A-FASTF64-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32-SIZE,FASTF16-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32-SIZE,SLOWF64-SIZE %s
 ; END.
 
-; ALL-LABEL: 'fadd_f32'
-; ALL: estimated cost of 1 for {{.*}} fadd float
-; NOPACKEDF32: estimated cost of 2 for {{.*}} fadd <2 x float>
-; PACKEDF32: estimated cost of 1 for {{.*}} fadd <2 x float>
-; Allow for 4 when v3f32 is illegal and TargetLowering thinks it needs widening,
-; and 3 when it is legal.
-; NOPACKEDF32: estimated cost of {{[34]}} for {{.*}} fadd <3 x float>
-; PACKEDF32: estimated cost of 2 for {{.*}} fadd <3 x float>
-; Allow for 8 when v5f32 is illegal and TargetLowering thinks it needs widening,
-; and 5 when it is legal.
-; NOPACKEDF32: estimated cost of {{[58]}} for {{.*}} fadd <5 x float>
-; PACKEDF32: estimated cost of 3 for {{.*}} fadd <5 x float>
 define amdgpu_kernel void @fadd_f32() #0 {
+; GFX90A-FASTF64-LABEL: 'fadd_f32'
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fadd <2 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; NOPACKEDF32-LABEL: 'fadd_f32'
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fadd <2 x float> undef, undef
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f32'
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fadd <2 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; NOPACKEDF32-SIZE-LABEL: 'fadd_f32'
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fadd <2 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = fadd float undef, undef
   %v2f32 = fadd <2 x float> undef, undef
   %v3f32 = fadd <3 x float> undef, undef
@@ -25,35 +43,72 @@ define amdgpu_kernel void @fadd_f32() #0 {
   ret void
 }
 
-; ALL-LABEL: 'fadd_f64'
-; GFX90A-FASTF64: estimated cost of 1 for {{.*}} fadd double
-; FASTF64: estimated cost of 2 for {{.*}} fadd double
-; SLOWF64: estimated cost of 4 for {{.*}} fadd double
-; SIZEALL: estimated cost of 2 for {{.*}} fadd double
-; GFX90A-FASTF64: estimated cost of 2 for {{.*}} fadd <2 x double>
-; FASTF64: estimated cost of 4 for {{.*}} fadd <2 x double>
-; SLOWF64: estimated cost of 8 for {{.*}} fadd <2 x double>
-; SIZEALL: estimated cost of 4 for {{.*}} fadd <2 x double>
-; GFX90A-FASTF64: estimated cost of 3 for {{.*}} fadd <3 x double>
-; FASTF64: estimated cost of 6 for {{.*}} fadd <3 x double>
-; SLOWF64: estimated cost of 12 for {{.*}} fadd <3 x double>
-; SIZEALL: estimated cost of 6 for {{.*}} fadd <3 x double>
 define amdgpu_kernel void @fadd_f64() #0 {
+; GFX90A-FASTF64-LABEL: 'fadd_f64'
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fadd double undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FASTF64-LABEL: 'fadd_f64'
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fadd double undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOWF64-LABEL: 'fadd_f64'
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = fadd double undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fadd <2 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fadd <3 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f64'
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fadd double undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; NOPACKEDF32-SIZE-LABEL: 'fadd_f64'
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fadd double undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f64 = fadd double undef, undef
   %v2f64 = fadd <2 x double> undef, undef
   %v3f64 = fadd <3 x double> undef, undef
   ret void
 }
 
-; ALL-LABEL: 'fadd_f16'
-; ALL: estimated cost of 1 for {{.*}} fadd half
-; SLOWF16: estimated cost of 2 for {{.*}} fadd <2 x half>
-; FASTF16: estimated cost of 1 for {{.*}} fadd <2 x half>
-; SLOWF16: estimated cost of 4 for {{.*}} fadd <3 x half>
-; FASTF16: estimated cost of 2 for {{.*}} fadd <3 x half>
-; SLOWF16: estimated cost of 4 for {{.*}} fadd <4 x half>
-; FASTF16: estimated cost of 2 for {{.*}} fadd <4 x half>
 define amdgpu_kernel void @fadd_f16() #0 {
+; FASTF16-LABEL: 'fadd_f16'
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fadd half undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOWF64-LABEL: 'fadd_f16'
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fadd half undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FASTF16-SIZE-LABEL: 'fadd_f16'
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fadd half undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SLOWF64-SIZE-LABEL: 'fadd_f16'
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fadd half undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f16 = fadd half undef, undef
   %v2f16 = fadd <2 x half> undef, undef
   %v3f16 = fadd <3 x half> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
index d4836a9d69049..fc641bba8a2b0 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
@@ -1,26 +1,32 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,THRPTALL,CIFASTF64,NOFP16 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=ALL,THRPTALL,CISLOWF64,NOFP16  %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=ALL,THRPTALL,SIFASTF64,NOFP16  %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde < %s | FileCheck -check-prefixes=ALL,THRPTALL,SISLOWF64,NOFP16  %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,THRPTALL,FP16,CISLOWF64 %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,NOFP16,CIFASTF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=ALL,NOFP16,CISLOWF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=ALL,NOFP16,SIFASTF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde < %s | FileCheck -check-prefixes=ALL,NOFP16,SISLOWF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FP16 %s
 
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,SIZEALL,SIZECI,SIZENOF16 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=ALL,SIZEALL,SIZECI,SIZENOF16 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=ALL,SIZEALL,SIZESI,SIZENOF16  %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde < %s | FileCheck -check-prefixes=ALL,SIZEALL,SIZESI,SIZENOF16  %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,SIZEALL,SIZECI,SIZEF16 %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL-SIZE,NOFP16-SIZE,CI-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=ALL-SIZE,NOFP16-SIZE,CI-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=ALL-SIZE,NOFP16-SIZE,SI-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde < %s | FileCheck -check-prefixes=ALL-SIZE,NOFP16-SIZE,SI-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FP16-SIZE %s
 ; END.
 
-; ALL-LABEL: 'fdiv_f32_ieee'
-; THRPTALL: estimated cost of 14 for {{.*}} fdiv float
-; THRPTALL: estimated cost of 28 for {{.*}} fdiv <2 x float>
-; THRPTALL: estimated cost of 42 for {{.*}} fdiv <3 x float>
-; THRPTALL: estimated cost of 70 for {{.*}} fdiv <5 x float>
-; SIZEALL: estimated cost of 12 for {{.*}} fdiv float
-; SIZEALL: estimated cost of 24 for {{.*}} fdiv <2 x float>
-; SIZEALL: estimated cost of 36 for {{.*}} fdiv <3 x float>
-; SIZEALL: estimated cost of 60 for {{.*}} fdiv <5 x float>
 define amdgpu_kernel void @fdiv_f32_ieee() #0 {
+; ALL-LABEL: 'fdiv_f32_ieee'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'fdiv_f32_ieee'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = fdiv float undef, undef
   %v2f32 = fdiv <2 x float> undef, undef
   %v3f32 = fdiv <3 x float> undef, undef
@@ -28,16 +34,21 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
   ret void
 }
 
-; ALL-LABEL: 'fdiv_f32_ftzdaz'
-; THRPTALL: estimated cost of 16 for {{.*}} fdiv float
-; SIZEALL: estimated cost of 14 for {{.*}} fdiv float
-; THRPTALL: estimated cost of 32 for {{.*}} fdiv <2 x float>
-; SIZEALL: estimated cost of 28 for {{.*}} fdiv <2 x float>
-; THRPTALL: estimated cost of 48 for {{.*}} fdiv <3 x float>
-; SIZEALL: estimated cost of 42 for {{.*}} fdiv <3 x float>
-; THRPTALL: estimated cost of 80 for {{.*}} fdiv <5 x float>
-; SIZEALL: estimated cost of 70 for {{.*}} fdiv <5 x float>
 define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
+; ALL-LABEL: 'fdiv_f32_ftzdaz'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %f32 = fdiv float undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v2f32 = fdiv <2 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f32 = fdiv <3 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'fdiv_f32_ftzdaz'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = fdiv float undef, undef
   %v2f32 = fdiv <2 x float> undef, undef
   %v3f32 = fdiv <3 x float> undef, undef
@@ -45,92 +56,188 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
   ret void
 }
 
-; ALL-LABEL: 'fdiv_f64'
-; CIFASTF64: estimated cost of 24 for {{.*}} fdiv double
-; CISLOWF64: estimated cost of 38 for {{.*}} fdiv double
-; SIFASTF64: estimated cost of 27 for {{.*}} fdiv double
-; SISLOWF64: estimated cost of 41 for {{.*}} fdiv double
-; SIZECI: estimated cost of 22 for {{.*}} fdiv double
-; SIZESI: estimated cost of 25 for {{.*}} fdiv double
-; CIFASTF64: estimated cost of 48 for {{.*}} fdiv <2 x double>
-; CISLOWF64: estimated cost of 76 for {{.*}} fdiv <2 x double>
-; SIFASTF64: estimated cost of 54 for {{.*}} fdiv <2 x double>
-; SISLOWF64: estimated cost of 82 for {{.*}} fdiv <2 x double>
-; SIZECI: estimated cost of 44 for {{.*}} fdiv <2 x double>
-; SIZESI: estimated cost of 50 for {{.*}} fdiv <2 x double>
-; CIFASTF64: estimated cost of 72 for {{.*}} fdiv <3 x double>
-; CISLOWF64: estimated cost of 114 for {{.*}} fdiv <3 x double>
-; SIFASTF64: estimated cost of 81 for {{.*}} fdiv <3 x double>
-; SISLOWF64: estimated cost of 123 for {{.*}} fdiv <3 x double>
-; SIZECI: estimated cost of 66 for {{.*}} fdiv <3 x double>
-; SIZESI: estimated cost of 75 for {{.*}} fdiv <3 x double>
 define amdgpu_kernel void @fdiv_f64() #0 {
+; CIFASTF64-LABEL: 'fdiv_f64'
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double undef, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> undef, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; CISLOWF64-LABEL: 'fdiv_f64'
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double undef, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SIFASTF64-LABEL: 'fdiv_f64'
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double undef, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> undef, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SISLOWF64-LABEL: 'fdiv_f64'
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double undef, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> undef, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FP16-LABEL: 'fdiv_f64'
+; FP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; CI-SIZE-LABEL: 'fdiv_f64'
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double undef, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SI-SIZE-LABEL: 'fdiv_f64'
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double undef, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> undef, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FP16-SIZE-LABEL: 'fdiv_f64'
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f64 = fdiv double undef, undef
   %v2f64 = fdiv <2 x double> undef, undef
   %v3f64 = fdiv <3 x double> undef, undef
   ret void
 }
 
-; ALL-LABEL: 'fdiv_f16_f32ieee'
-; NOFP16: estimated cost of 14 for {{.*}} fdiv half
-; FP16: estimated cost of 12 for {{.*}} fdiv half
-; SIZENOF16: estimated cost of 12 for {{.*}} fdiv half
-; SIZEF16: estimated cost of 8 for {{.*}} fdiv half
-; NOFP16: estimated cost of 28 for {{.*}} fdiv <2 x half>
-; FP16: estimated cost of 24 for {{.*}} fdiv <2 x half>
-; SIZENOF16: estimated cost of 24 for {{.*}} fdiv <2 x half>
-; SIZEF16: estimated cost of 16 for {{.*}} fdiv <2 x half>
-; NOFP16: estimated cost of 56 for {{.*}} fdiv <4 x half>
-; FP16: estimated cost of 48 for {{.*}} fdiv <4 x half>
-; SIZENOF16: estimated cost of 48 for {{.*}} fdiv <4 x half>
-; SIZEF16: estimated cost of 32 for {{.*}} fdiv <4 x half>
 define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
+; NOFP16-LABEL: 'fdiv_f16_f32ieee'
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FP16-LABEL: 'fdiv_f16_f32ieee'
+; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ieee'
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FP16-SIZE-LABEL: 'fdiv_f16_f32ieee'
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f16 = fdiv half undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f16 = fdiv half undef, undef
   %v2f16 = fdiv <2 x half> undef, undef
   %v4f16 = fdiv <4 x half> undef, undef
   ret void
 }
 
-; ALL-LABEL: 'fdiv_f16_f32ftzdaz'
-; NOFP16: estimated cost of 16 for {{.*}} fdiv half
-; FP16: estimated cost of 12 for {{.*}} fdiv half
-; SIZENOF16: estimated cost of 14 for {{.*}} fdiv half
-; SIZEF16: estimated cost of 8 for {{.*}} fdiv half
-; NOFP16: estimated cost of 32 for {{.*}} fdiv <2 x half>
-; FP16: estimated cost of 24 for {{.*}} fdiv <2 x half>
-; SIZENOF16: estimated cost of 28 for {{.*}} fdiv <2 x half>
-; SIZEF16: estimated cost of 16 for {{.*}} fdiv <2 x half>
-; NOFP16: estimated cost of 64 for {{.*}} fdiv <4 x half>
-; FP16: estimated cost of 48 for {{.*}} fdiv <4 x half>
-; SIZENOF16: estimated cost of 56 for {{.*}} fdiv <4 x half>
-; SIZEF16: estimated cost of 32 for {{.*}} fdiv <4 x half>
 define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
+; NOFP16-LABEL: 'fdiv_f16_f32ftzdaz'
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %f16 = fdiv half undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FP16-LABEL: 'fdiv_f16_f32ftzdaz'
+; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz'
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz'
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f16 = fdiv half undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f16 = fdiv half undef, undef
   %v2f16 = fdiv <2 x half> undef, undef
   %v4f16 = fdiv <4 x half> undef, undef
   ret void
 }
 
-; ALL-LABEL: 'rcp_ieee'
-; THRPTALL: estimated cost of 14 for {{.*}} fdiv float
-; SIZEALL: estimated cost of 12 for {{.*}} fdiv float
-; NOFP16: estimated cost of 14 for {{.*}} fdiv half
-; FP16: estimated cost of 4 for {{.*}} fdiv half
-; SIZENOF16: estimated cost of 12 for {{.*}} fdiv half
-; SIZEF16: estimated cost of 2 for {{.*}} fdiv half
-; CIFASTF64: estimated cost of 24 for {{.*}} fdiv double
-; CISLOWF64: estimated cost of 38 for {{.*}} fdiv double
-; SIFASTF64: estimated cost of 27 for {{.*}} fdiv double
-; SISLOWF64: estimated cost of 41 for {{.*}} fdiv double
-; SIZECI: estimated cost of 22 for {{.*}} fdiv double
-; SIZESI: estimated cost of 25 for {{.*}} fdiv double
-; THRPTALL: estimated cost of 28 for {{.*}} fdiv <2 x float>
-; SIZEALL: estimated cost of 24 for {{.*}} fdiv <2 x float>
-; NOFP16: estimated cost of 28 for {{.*}} fdiv <2 x half>
-; FP16: estimated cost of 8 for {{.*}} fdiv <2 x half>
-; SIZENOF16: estimated cost of 24 for {{.*}} fdiv <2 x half>
-; SIZEF16: estimated cost of 4 for {{.*}} fdiv <2 x half>
 define amdgpu_kernel void @rcp_ieee() #0 {
+; CIFASTF64-LABEL: 'rcp_ieee'
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; CISLOWF64-LABEL: 'rcp_ieee'
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SIFASTF64-LABEL: 'rcp_ieee'
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SISLOWF64-LABEL: 'rcp_ieee'
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FP16-LABEL: 'rcp_ieee'
+; FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; CI-SIZE-LABEL: 'rcp_ieee'
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SI-SIZE-LABEL: 'rcp_ieee'
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FP16-SIZE-LABEL: 'rcp_ieee'
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = fdiv float 1.0, undef
   %f16 = fdiv half 1.0, undef
   %f64 = fdiv double 1.0, undef
@@ -139,16 +246,21 @@ define amdgpu_kernel void @rcp_ieee() #0 {
   ret void
 }
 
-; ALL-LABEL: 'rcp_ftzdaz'
-; THRPTALL: estimated cost of 4 for {{.*}} fdiv float
-; SIZEALL: estimated cost of 2 for {{.*}} fdiv float
-; THRPTALL: estimated cost of 4 for {{.*}} fdiv half
-; SIZEALL: estimated cost of 2 for {{.*}} fdiv half
-; THRPTALL: estimated cost of 8 for {{.*}} fdiv <2 x float>
-; SIZEALL: estimated cost of 4 for {{.*}} fdiv <2 x float>
-; THRPTALL: estimated cost of 8 for {{.*}} fdiv <2 x half>
-; SIZEALL: estimated cost of 4 for {{.*}} fdiv <2 x half>
 define amdgpu_kernel void @rcp_ftzdaz() #1 {
+; ALL-LABEL: 'rcp_ftzdaz'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'rcp_ftzdaz'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = fdiv float 1.0, undef
   %f16 = fdiv half 1.0, undef
   %v2f32 = fdiv <2 x float> <float 1.0, float 1.0>, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
index 1758663ffeff5..d9e4e453abf05 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
@@ -1,24 +1,48 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF32,FASTF16,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF32,SLOWF16,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIZEALL,SIZEF16 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIZEALL,SIZENOF16 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX90A-FASTF64,FASTF16,PACKEDF32,ALL %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9,GFX90A %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOW %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9-SIZE,GFX90A-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9-SIZE,SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SIZE,SLOW-SIZE %s
 ; END.
 
-; ALL-LABEL: 'fma_f32'
-; SLOWF32: estimated cost of 4 for {{.*}} call float @llvm.fma.f32
-; FASTF32: estimated cost of 2 for {{.*}} call float @llvm.fma.f32
-; SIZEALL: estimated cost of 2 for {{.*}} call float @llvm.fma.f32
-; SLOWF32: estimated cost of 8 for {{.*}} call <2 x float> @llvm.fma.v2f32
-; PACKEDF32: estimated cost of 2 for {{.*}} call <2 x float> @llvm.fma.v2f32
-; SIZEALL: estimated cost of 4 for {{.*}} call <2 x float> @llvm.fma.v2f32
-; SLOWF32: estimated cost of 12 for {{.*}} call <3 x float> @llvm.fma.v3f32
-; PACKEDF32: estimated cost of 4 for {{.*}} call <3 x float> @llvm.fma.v3f32
-; SIZEALL: estimated cost of 6 for {{.*}} call <3 x float> @llvm.fma.v3f32
-; SLOWF32: estimated cost of 20 for {{.*}} call <5 x float> @llvm.fma.v5f32
-; PACKEDF32: estimated cost of 6 for {{.*}} call <5 x float> @llvm.fma.v5f32
-; SIZEALL: estimated cost of 10 for {{.*}} call <5 x float> @llvm.fma.v5f32
 define amdgpu_kernel void @fma_f32() #0 {
+; GFX90A-LABEL: 'fma_f32'
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX900-LABEL: 'fma_f32'
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW-LABEL: 'fma_f32'
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX90A-SIZE-LABEL: 'fma_f32'
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'fma_f32'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #1
   %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #1
   %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #1
@@ -26,38 +50,68 @@ define amdgpu_kernel void @fma_f32() #0 {
   ret void
 }
 
-; ALL-LABEL: 'fma_f64'
-; SLOWF64: estimated cost of 4 for {{.*}} call double @llvm.fma.f64
-; GFX90A-FASTF64: estimated cost of 1 for {{.*}} call double @llvm.fma.f64
-; FASTF64: estimated cost of 2 for {{.*}} call double @llvm.fma.f64
-; SIZEALL: estimated cost of 2 for {{.*}} call double @llvm.fma.f64
-; SLOWF64: estimated cost of 8 for {{.*}} call <2 x double> @llvm.fma.v2f64
-; GFX90A-FASTF64: estimated cost of 2 for {{.*}} call <2 x double> @llvm.fma.v2f64
-; FASTF64: estimated cost of 4 for {{.*}} call <2 x double> @llvm.fma.v2f64
-; SIZEALL: estimated cost of 4 for {{.*}} call <2 x double> @llvm.fma.v2f64
-; SLOWF64: estimated cost of 12 for {{.*}} call <3 x double> @llvm.fma.v3f64
-; FASTF64: estimated cost of 6 for {{.*}} call <3 x double> @llvm.fma.v3f64
-; SIZEALL: estimated cost of 6 for {{.*}} call <3 x double> @llvm.fma.v3f64
 define amdgpu_kernel void @fma_f64() #0 {
+; GFX90A-LABEL: 'fma_f64'
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX900-LABEL: 'fma_f64'
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW-LABEL: 'fma_f64'
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX90A-SIZE-LABEL: 'fma_f64'
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'fma_f64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #1
   %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #1
   %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #1
   ret void
 }
 
-; ALL-LABEL: 'fma_f16'
-; SLOWF16: estimated cost of 4 for {{.*}} call half @llvm.fma.f16
-; FASTF16: estimated cost of 2 for {{.*}} call half @llvm.fma.f16
-; SIZEALL: estimated cost of 2 for {{.*}} call half @llvm.fma.f16
-; SLOWF16: estimated cost of 8 for {{.*}} call <2 x half> @llvm.fma.v2f16
-; FASTF16: estimated cost of 2 for {{.*}} call <2 x half> @llvm.fma.v2f16
-; SIZEF16: estimated cost of 2 for {{.*}} call <2 x half> @llvm.fma.v2f16
-; SIZENOF16: estimated cost of 4 for {{.*}} call <2 x half> @llvm.fma.v2f16
-; SLOWF16: estimated cost of 16 for {{.*}} call <3 x half> @llvm.fma.v3f16
-; FASTF16: estimated cost of 4 for {{.*}} call <3 x half> @llvm.fma.v3f16
-; SIZEF16: estimated cost of 4 for {{.*}} call <3 x half> @llvm.fma.v3f16
-; SIZENOF16: estimated cost of 8 for {{.*}} call <3 x half> @llvm.fma.v3f16
 define amdgpu_kernel void @fma_f16() #0 {
+; GFX9-LABEL: 'fma_f16'
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW-LABEL: 'fma_f16'
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX9-SIZE-LABEL: 'fma_f16'
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SLOW-SIZE-LABEL: 'fma_f16'
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #1
   %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #1
   %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #1

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
index 75c5e76be7411..82e8f0fb90a78 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
@@ -1,23 +1,41 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIZEALL,FASTF16 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIZEALL,SLOWF16 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX90A-FASTF64,FASTF16,PACKEDF32,ALL %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9,GFX90A-FASTF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9,F32,FASTF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=F32,SLOW %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9-SIZE,GFX90A-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=SIZE,GFX9-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SIZE,SLOW-SIZE %s
 ; END.
 
-; ALL-LABEL: 'fmul_f32'
-; ALL: estimated cost of 1 for {{.*}} fmul float
-; NOPACKEDF32: estimated cost of 2 for {{.*}} fmul <2 x float>
-; PACKEDF32: estimated cost of 1 for {{.*}} fmul <2 x float>
-;;; Allow for 4 when v3f32 is illegal and TargetLowering thinks it needs widening,
-;;; and 3 when it is legal.
-; NOPACKEDF32: estimated cost of {{[34]}} for {{.*}} fmul <3 x float>
-; PACKEDF32: estimated cost of 2 for {{.*}} fmul <3 x float>
-;;; Allow for 8 when v5f32 is illegal and TargetLowering thinks it needs widening,
-;;; and 5 when it is legal.
-; NOPACKEDF32: estimated cost of {{[58]}} for {{.*}} fmul <5 x float>
-; PACKEDF32: estimated cost of 3 for {{.*}} fmul <5 x float>
 define amdgpu_kernel void @fmul_f32() #0 {
+; GFX90A-FASTF64-LABEL: 'fmul_f32'
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; F32-LABEL: 'fmul_f32'
+; F32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
+; F32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef
+; F32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef
+; F32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; F32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX90A-SIZE-LABEL: 'fmul_f32'
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'fmul_f32'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = fmul float undef, undef
   %v2f32 = fmul <2 x float> undef, undef
   %v3f32 = fmul <3 x float> undef, undef
@@ -25,33 +43,72 @@ define amdgpu_kernel void @fmul_f32() #0 {
   ret void
 }
 
-; ALL-LABEL: 'fmul_f64'
-; GFX90A-FASTF64: estimated cost of 1 for {{.*}} fmul double
-; FASTF64: estimated cost of 2 for {{.*}} fmul double
-; SLOWF64: estimated cost of 4 for {{.*}} fmul double
-; SIZEALL: estimated cost of 2 for {{.*}} fmul double
-; FASTF64: estimated cost of 4 for {{.*}} fmul <2 x double>
-; SLOWF64: estimated cost of 8 for {{.*}} fmul <2 x double>
-; SIZEALL: estimated cost of 4 for {{.*}} fmul <2 x double>
-; FASTF64: estimated cost of 6 for {{.*}} fmul <3 x double>
-; SLOWF64: estimated cost of 12 for {{.*}} fmul <3 x double>
-; SIZEALL: estimated cost of 6 for {{.*}} fmul <3 x double>
 define amdgpu_kernel void @fmul_f64() #0 {
+; GFX90A-FASTF64-LABEL: 'fmul_f64'
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fmul double undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FASTF64-LABEL: 'fmul_f64'
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fmul double undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW-LABEL: 'fmul_f64'
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = fmul double undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fmul <2 x double> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fmul <3 x double> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX90A-SIZE-LABEL: 'fmul_f64'
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fmul double undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'fmul_f64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fmul double undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f64 = fmul double undef, undef
   %v2f64 = fmul <2 x double> undef, undef
   %v3f64 = fmul <3 x double> undef, undef
   ret void
 }
 
-; ALL-LABEL: 'fmul_f16'
-; ALL: estimated cost of 1 for {{.*}} fmul half
-; SLOWF16: estimated cost of 2 for {{.*}} fmul <2 x half>
-; FASTF16: estimated cost of 1 for {{.*}} fmul <2 x half>
-; SLOWF16: estimated cost of 4 for {{.*}} fmul <3 x half>
-; FASTF16: estimated cost of 2 for {{.*}} fmul <3 x half>
-; SLOWF16: estimated cost of 4 for {{.*}} fmul <4 x half>
-; FASTF16: estimated cost of 2 for {{.*}} fmul <4 x half>
 define amdgpu_kernel void @fmul_f16() #0 {
+; GFX9-LABEL: 'fmul_f16'
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fmul half undef, undef
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW-LABEL: 'fmul_f16'
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fmul half undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX9-SIZE-LABEL: 'fmul_f16'
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fmul half undef, undef
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SLOW-SIZE-LABEL: 'fmul_f16'
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fmul half undef, undef
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f16 = fmul half undef, undef
   %v2f16 = fmul <2 x half> undef, undef
   %v3f16 = fmul <3 x half> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
index 0038f5b9fa3bd..970c8d9d2886a 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
@@ -1,13 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SIZE %s
 ; END.
 
-; CHECK-LABEL: 'fneg_f32'
-; CHECK: estimated cost of 0 for {{.*}} fneg float
-; CHECK: estimated cost of 0 for {{.*}} fneg <2 x float>
-; CHECK: estimated cost of 0 for {{.*}} fneg <3 x float>
-; CHECK: estimated cost of 0 for {{.*}} fneg <5 x float>
 define amdgpu_kernel void @fneg_f32() {
+; CHECK-LABEL: 'fneg_f32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32 = fneg float undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = fneg <2 x float> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SIZE-LABEL: 'fneg_f32'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32 = fneg float undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = fneg <2 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = fneg float undef
   %v2f32 = fneg <2 x float> undef
   %v3f32 = fneg <3 x float> undef
@@ -15,22 +25,38 @@ define amdgpu_kernel void @fneg_f32() {
   ret void
 }
 
-; CHECK-LABEL: 'fneg_f64'
-; CHECK: estimated cost of 0 for {{.*}} fneg double
-; CHECK: estimated cost of 0 for {{.*}} fneg <2 x double>
-; CHECK: estimated cost of 0 for {{.*}} fneg <3 x double>
 define amdgpu_kernel void @fneg_f64() {
+; CHECK-LABEL: 'fneg_f64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64 = fneg double undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = fneg <2 x double> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = fneg <3 x double> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SIZE-LABEL: 'fneg_f64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64 = fneg double undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = fneg <2 x double> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = fneg <3 x double> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f64 = fneg double undef
   %v2f64 = fneg <2 x double> undef
   %v3f64 = fneg <3 x double> undef
   ret void
 }
 
-; CHECK-LABEL: 'fneg_f16'
-; CHECK: estimated cost of 0 for {{.*}} fneg half
-; CHECK: estimated cost of 0 for {{.*}} fneg <2 x half>
-; CHECK: estimated cost of 0 for {{.*}} fneg <3 x half>
 define amdgpu_kernel void @fneg_f16() {
+; CHECK-LABEL: 'fneg_f16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16 = fneg half undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SIZE-LABEL: 'fneg_f16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16 = fneg half undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f16 = fneg half undef
   %v2f16 = fneg <2 x half> undef
   %v3f16 = fneg <3 x half> undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
index 27d5a000ef5f8..28ef4e587a9c4 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
@@ -1,15 +1,25 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s
-; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=SIZEALL,FASTF16,ALL %s
-; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SIZEALL,SLOWF16,ALL %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,FASTF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SLOWF64 %s
+; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL-SIZE,FASTF64-SIZE %s
+; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW64-SIZE %s
 ; END.
 
-; ALL-LABEL: 'fsub_f32'
-; ALL: estimated cost of 1 for {{.*}} fsub float
-; ALL: estimated cost of 2 for {{.*}} fsub <2 x float>
-; ALL: estimated cost of 3 for {{.*}} fsub <3 x float>
-; ALL: estimated cost of 5 for {{.*}} fsub <5 x float>
 define amdgpu_kernel void @fsub_f32() #0 {
+; ALL-LABEL: 'fsub_f32'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'fsub_f32'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = fsub float undef, undef
   %v2f32 = fsub <2 x float> undef, undef
   %v3f32 = fsub <3 x float> undef, undef
@@ -17,32 +27,60 @@ define amdgpu_kernel void @fsub_f32() #0 {
   ret void
 }
 
-; ALL-LABEL: 'fsub_f64'
-; FASTF64: estimated cost of 2 for {{.*}} fsub double
-; SLOWF64: estimated cost of 4 for {{.*}} fsub double
-; SIZEALL: estimated cost of 2 for {{.*}} fsub double
-; FASTF64: estimated cost of 4 for {{.*}} fsub <2 x double>
-; SLOWF64: estimated cost of 8 for {{.*}} fsub <2 x double>
-; SIZEALL: estimated cost of 4 for {{.*}} fsub <2 x double>
-; FASTF64: estimated cost of 6 for {{.*}} fsub <3 x double>
-; SLOWF64: estimated cost of 12 for {{.*}} fsub <3 x double>
-; SIZEALL: estimated cost of 6 for {{.*}} fsub <3 x double>
 define amdgpu_kernel void @fsub_f64() #0 {
+; FASTF64-LABEL: 'fsub_f64'
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fsub double undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOWF64-LABEL: 'fsub_f64'
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = fsub double undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fsub <2 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fsub <3 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'fsub_f64'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fsub double undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f64 = fsub double undef, undef
   %v2f64 = fsub <2 x double> undef, undef
   %v3f64 = fsub <3 x double> undef, undef
   ret void
 }
 
-; ALL-LABEL: 'fsub_f16'
-; ALL: estimated cost of 1 for {{.*}} fsub half
-; SLOWF16: estimated cost of 2 for {{.*}} fsub <2 x half>
-; FASTF16: estimated cost of 1 for {{.*}} fsub <2 x half>
-; SLOWF16: estimated cost of 4 for {{.*}} fsub <3 x half>
-; FASTF16: estimated cost of 2 for {{.*}} fsub <3 x half>
-; SLOWF16: estimated cost of 4 for {{.*}} fsub <4 x half>
-; FASTF16: estimated cost of 2 for {{.*}} fsub <4 x half>
 define amdgpu_kernel void @fsub_f16() #0 {
+; FASTF64-LABEL: 'fsub_f16'
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOWF64-LABEL: 'fsub_f16'
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FASTF64-SIZE-LABEL: 'fsub_f16'
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SLOW64-SIZE-LABEL: 'fsub_f16'
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f16 = fsub half undef, undef
   %v2f16 = fsub <2 x half> undef, undef
   %v3f16 = fsub <3 x half> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
index 52b745bbad3b3..50fccb9a2f2a3 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
@@ -1,35 +1,70 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=FUSED,NOCONTRACT,THRPTALL,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=on < %s | FileCheck -check-prefixes=SLOW,NOCONTRACT,THRPTALL,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=fast < %s | FileCheck -check-prefixes=FUSED,CONTRACT,THRPTALL,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=GFX1030,NOCONTRACT,THRPTALL,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=FUSED,SZNOCONTRACT,SIZEALL,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=on < %s | FileCheck -check-prefixes=SLOW,SZNOCONTRACT,SIZEALL,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=fast < %s | FileCheck -check-prefixes=FUSED,CONTRACT,SIZEALL,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=GFX1030,SZNOCONTRACT,SIZEALL,ALL %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=SLOWF64,FUSED,SLOWF32 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=on < %s | FileCheck -check-prefixes=SLOWF64,FASTF32,GFX9SLOW %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=fast < %s | FileCheck -check-prefixes=FUSED,SLOWF32,GFX9FAST %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=SLOWF64,FUSED,FASTF32 %s
+
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=SLOWF64-SIZE,FUSED-SIZE,SLOWF32-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=on < %s | FileCheck -check-prefixes=SLOWF64-SIZE,FASTF32-SIZE,GFX9SLOW-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee -denormal-fp-math=ieee -fp-contract=fast < %s | FileCheck -check-prefixes=FUSED-SIZE,SLOWF32-SIZE,GFX9FAST-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -denormal-fp-math=preserve-sign -fp-contract=on < %s | FileCheck -check-prefixes=SLOWF64-SIZE,FUSED-SIZE,FASTF32-SIZE %s
 ; END.
 
 target triple = "amdgcn--"
 
-; ALL-LABEL: 'fmul_fadd_f32':
-; FUSED: estimated cost of 0 for {{.*}} fmul float
-; SLOW: estimated cost of 1 for {{.*}} fmul float
-; GFX1030: estimated cost of 1 for {{.*}} fmul float
-; ALL: estimated cost of 1 for {{.*}} fadd float
-; ALL: estimated cost of 0 for {{.*}} fmul contract float
-; ALL: estimated cost of 1 for {{.*}} fadd contract float
-; FUSED: estimated cost of 0 for {{.*}} fmul <2 x float>
-; SLOW: estimated cost of 2 for {{.*}} fmul <2 x float>
-; GFX1030: estimated cost of 2 for {{.*}} fmul <2 x float>
-; ALL: estimated cost of 2 for {{.*}} fadd <2 x float>
-; FUSED: estimated cost of 0 for {{.*}} fmul float
-; SLOW: estimated cost of 1 for {{.*}} fmul float
-; GFX1030: estimated cost of 1 for {{.*}} fmul float
-; ALL: estimated cost of 1 for {{.*}} fsub float
-; FUSED: estimated cost of 0 for {{.*}} fmul <2 x float>
-; SLOW: estimated cost of 2 for {{.*}} fmul <2 x float>
-; GFX1030: estimated cost of 2 for {{.*}} fmul <2 x float>
-; ALL: estimated cost of 2 for {{.*}} fsub <2 x float>
 define void @fmul_fadd_f32() #0 {
+; SLOWF32-LABEL: 'fmul_fadd_f32'
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32 = fmul float undef, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32add = fadd float %f32, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32c = fmul contract float undef, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32cadd = fadd contract float %f32c, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = fmul <2 x float> undef, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32_2 = fmul float undef, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = fmul <2 x float> undef, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FASTF32-LABEL: 'fmul_fadd_f32'
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32add = fadd float %f32, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32c = fmul contract float undef, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32cadd = fadd contract float %f32c, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32_2 = fmul float undef, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_2 = fmul <2 x float> undef, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOWF32-SIZE-LABEL: 'fmul_fadd_f32'
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32 = fmul float undef, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32add = fadd float %f32, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32c = fmul contract float undef, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32cadd = fadd contract float %f32c, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = fmul <2 x float> undef, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32_2 = fmul float undef, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = fmul <2 x float> undef, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FASTF32-SIZE-LABEL: 'fmul_fadd_f32'
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32add = fadd float %f32, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32c = fmul contract float undef, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32cadd = fadd contract float %f32c, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32_2 = fmul float undef, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_2 = fmul <2 x float> undef, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f32 = fmul float undef, undef
   %f32add = fadd float %f32, undef
   %f32c = fmul contract float undef, undef
@@ -43,22 +78,59 @@ define void @fmul_fadd_f32() #0 {
   ret void
 }
 
-; ALL-LABEL: 'fmul_fadd_f16':
-; FUSED: estimated cost of 0 for {{.*}} fmul half
-; SLOW: estimated cost of 1 for {{.*}} fmul half
-; ALL: estimated cost of 1 for {{.*}} fadd half
-; ALL: estimated cost of 0 for {{.*}} fmul contract half
-; ALL: estimated cost of 1 for {{.*}} fadd contract half
-; FUSED: estimated cost of 0 for {{.*}} fmul <2 x half>
-; SLOW: estimated cost of 1 for {{.*}} fmul <2 x half>
-; ALL: estimated cost of 1 for {{.*}} fadd <2 x half>
-; FUSED: estimated cost of 0 for {{.*}} fmul half
-; SLOW: estimated cost of 1 for {{.*}} fmul half
-; ALL: estimated cost of 1 for {{.*}} fsub half
-; FUSED: estimated cost of 0 for {{.*}} fmul <2 x half>
-; SLOW: estimated cost of 1 for {{.*}} fmul <2 x half>
-; ALL: estimated cost of 1 for {{.*}} fsub <2 x half>
 define void @fmul_fadd_f16() #0 {
+; FUSED-LABEL: 'fmul_fadd_f16'
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16 = fmul half undef, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16add = fadd half %f16, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16c = fmul contract half undef, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f15cadd = fadd contract half %f16c, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fmul <2 x half> undef, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16_2 = fmul half undef, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16_2 = fmul <2 x half> undef, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX9SLOW-LABEL: 'fmul_fadd_f16'
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fmul half undef, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16add = fadd half %f16, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16c = fmul contract half undef, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f15cadd = fadd contract half %f16c, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16_2 = fmul half undef, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_2 = fmul <2 x half> undef, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FUSED-SIZE-LABEL: 'fmul_fadd_f16'
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16 = fmul half undef, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16add = fadd half %f16, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16c = fmul contract half undef, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f15cadd = fadd contract half %f16c, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fmul <2 x half> undef, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16_2 = fmul half undef, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16_2 = fmul <2 x half> undef, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; GFX9SLOW-SIZE-LABEL: 'fmul_fadd_f16'
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fmul half undef, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16add = fadd half %f16, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16c = fmul contract half undef, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f15cadd = fadd contract half %f16c, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16_2 = fmul half undef, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_2 = fmul <2 x half> undef, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f16 = fmul half undef, undef
   %f16add = fadd half %f16, undef
   %f16c = fmul contract half undef, undef
@@ -72,31 +144,59 @@ define void @fmul_fadd_f16() #0 {
   ret void
 }
 
-; ALL-LABEL: 'fmul_fadd_f64':
-; CONTRACT: estimated cost of 0 for {{.*}} fmul double
-; NOCONTRACT: estimated cost of 4 for {{.*}} fmul double
-; SZNOCONTRACT: estimated cost of 2 for {{.*}} fmul double
-; THRPTALL: estimated cost of 4 for {{.*}} fadd double
-; SIZEALL: estimated cost of 2 for {{.*}} fadd double
-; ALL: estimated cost of 0 for {{.*}} fmul contract double
-; THRPTALL: estimated cost of 4 for {{.*}} fadd contract double
-; SIZEALL: estimated cost of 2 for {{.*}} fadd contract double
-; CONTRACT: estimated cost of 0 for {{.*}} fmul <2 x double>
-; NOCONTRACT: estimated cost of 8 for {{.*}} fmul <2 x double>
-; SZNOCONTRACT: estimated cost of 4 for {{.*}} fmul <2 x double>
-; THRPTALL: estimated cost of 8 for {{.*}} fadd <2 x double>
-; SIZEALL: estimated cost of 4 for {{.*}} fadd <2 x double>
-; CONTRACT: estimated cost of 0 for {{.*}} fmul double
-; NOCONTRACT: estimated cost of 4 for {{.*}} fmul double
-; SZNOCONTRACT: estimated cost of 2 for {{.*}} fmul double
-; THRPTALL: estimated cost of 4 for {{.*}} fsub double
-; SIZEALL: estimated cost of 2 for {{.*}} fsub double
-; CONTRACT: estimated cost of 0 for {{.*}} fmul <2 x double>
-; NOCONTRACT: estimated cost of 8 for {{.*}} fmul <2 x double>
-; SZNOCONTRACT: estimated cost of 4 for {{.*}} fmul <2 x double>
-; THRPTALL: estimated cost of 8 for {{.*}} fsub <2 x double>
-; SIZEALL: estimated cost of 4 for {{.*}} fsub <2 x double>
 define void @fmul_fadd_f64() #0 {
+; SLOWF64-LABEL: 'fmul_fadd_f64'
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = fmul double undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64add = fadd double %f64, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64c = fmul contract double undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64cadd = fadd contract double %f64c, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fmul <2 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64_2 = fmul double undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64sub = fsub double %f64_2, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64_2 = fmul <2 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX9FAST-LABEL: 'fmul_fadd_f64'
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64 = fmul double undef, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64add = fadd double %f64, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64c = fmul contract double undef, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64cadd = fadd contract double %f64c, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = fmul <2 x double> undef, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64_2 = fmul double undef, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64sub = fsub double %f64_2, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = fmul <2 x double> undef, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOWF64-SIZE-LABEL: 'fmul_fadd_f64'
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fmul double undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64add = fadd double %f64, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64c = fmul contract double undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64cadd = fadd contract double %f64c, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64_2 = fmul double undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64sub = fsub double %f64_2, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64_2 = fmul <2 x double> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; GFX9FAST-SIZE-LABEL: 'fmul_fadd_f64'
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64 = fmul double undef, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64add = fadd double %f64, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64c = fmul contract double undef, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64cadd = fadd contract double %f64c, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = fmul <2 x double> undef, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64_2 = fmul double undef, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64sub = fsub double %f64_2, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = fmul <2 x double> undef, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %f64 = fmul double undef, undef
   %f64add = fadd double %f64, undef
   %f64c = fmul contract double undef, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll
index a7d28413319bf..e0f64201b52a5 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll
@@ -1,19 +1,45 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,GFX89 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX89 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,GFX89 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX89 %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=CI %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GFX89 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX89 %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=CI-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GFX89-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX89-SIZE %s
 ; END.
 
-; GCN-LABEL: 'insertelement_v2'
-; GCN: estimated cost of 0 for {{.*}} insertelement <2 x i32>
-; GCN: estimated cost of 0 for {{.*}} insertelement <2 x i64>
-; CI: estimated cost of 1 for {{.*}} insertelement <2 x i16>
-; GFX89: estimated cost of 0 for {{.*}} insertelement <2 x i16>
-; GCN: estimated cost of 1 for {{.*}} insertelement <2 x i16>
-; GCN: estimated cost of 1 for {{.*}} insertelement <2 x i8>
 define amdgpu_kernel void @insertelement_v2() {
+; CI-LABEL: 'insertelement_v2'
+; CI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; GFX89-LABEL: 'insertelement_v2'
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; CI-SIZE-LABEL: 'insertelement_v2'
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; GFX89-SIZE-LABEL: 'insertelement_v2'
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1
   %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1
   %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
index 33109ff18a2c6..6d702aed3cf43 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
@@ -1,21 +1,27 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW16,THRPTALL,ALL %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST16,THRPTALL,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SIZESLOW16,SIZEALL,ALL %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=SIZEFAST16,SIZEALL,ALL %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW16-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST16-SIZE %s
 ; END.
 
-; ALL-LABEL: 'mul_i32'
-; THRPTALL: estimated cost of 4 for {{.*}} mul i32
-; SIZEALL: estimated cost of 2 for {{.*}} mul i32
-; THRPTALL: estimated cost of 8 for {{.*}} mul <2 x i32>
-; SIZEALL: estimated cost of 4 for {{.*}} mul <2 x i32>
-; THRPTALL: estimated cost of 12 for {{.*}} mul <3 x i32>
-; SIZEALL: estimated cost of 6 for {{.*}} mul <3 x i32>
-; THRPTALL: estimated cost of 16 for {{.*}} mul <4 x i32>
-; SIZEALL: estimated cost of 8 for {{.*}} mul <4 x i32>
-; THRPTALL: estimated cost of 20 for {{.*}} mul <5 x i32>
-; SIZEALL: estimated cost of 10 for {{.*}} mul <5 x i32>
 define amdgpu_kernel void @mul_i32() #0 {
+; ALL-LABEL: 'mul_i32'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i32 = mul i32 undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i32 = mul <2 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3i32 = mul <3 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i32 = mul <4 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5i32 = mul <5 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'mul_i32'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = mul i32 undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = mul <2 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i32 = mul <3 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = mul <4 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i32 = mul <5 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i32 = mul i32 undef, undef
   %v2i32 = mul <2 x i32> undef, undef
   %v3i32 = mul <3 x i32> undef, undef
@@ -24,18 +30,23 @@ define amdgpu_kernel void @mul_i32() #0 {
   ret void
 }
 
-; ALL-LABEL: 'mul_i64'
-; THRPTALL: estimated cost of 20 for {{.*}} mul i64
-; SIZEALL: estimated cost of 12 for {{.*}} mul i64
-; THRPTALL: estimated cost of 40 for {{.*}} mul <2 x i64>
-; SIZEALL: estimated cost of 24 for {{.*}} mul <2 x i64>
-; THRPTALL: estimated cost of 60 for {{.*}} mul <3 x i64>
-; SIZEALL: estimated cost of 36 for {{.*}} mul <3 x i64>
-; THRPTALL: estimated cost of 80 for {{.*}} mul <4 x i64>
-; SIZEALL: estimated cost of 48 for {{.*}} mul <4 x i64>
-; THRPTALL: estimated cost of 320 for {{.*}} mul <8 x i64>
-; SIZEALL: estimated cost of 192 for {{.*}} mul <8 x i64>
 define amdgpu_kernel void @mul_i64() #0 {
+; ALL-LABEL: 'mul_i64'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %i64 = mul i64 undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2i64 = mul <2 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v3i64 = mul <3 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v4i64 = mul <4 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v8i64 = mul <8 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'mul_i64'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %i64 = mul i64 undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2i64 = mul <2 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3i64 = mul <3 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4i64 = mul <4 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v8i64 = mul <8 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i64 = mul i64 undef, undef
   %v2i64 = mul <2 x i64> undef, undef
   %v3i64 = mul <3 x i64> undef, undef
@@ -44,18 +55,31 @@ define amdgpu_kernel void @mul_i64() #0 {
   ret void
 }
 
-; ALL-LABEL: 'mul_i16'
-; THRPTALL: estimated cost of 4 for {{.*}} mul i16
-; SIZEALL: estimated cost of 2 for {{.*}} mul i16
-; SLOW16: estimated cost of 8 for {{.*}} mul <2 x i16>
-; FAST16: estimated cost of 4 for {{.*}} mul <2 x i16>
-; SIZESLOW16: estimated cost of 4 for {{.*}} mul <2 x i16>
-; SIZEFAST16: estimated cost of 2 for {{.*}} mul <2 x i16>
-; SLOW16: estimated cost of 16 for {{.*}} mul <3 x i16>
-; FAST16: estimated cost of 8 for {{.*}} mul <3 x i16>
-; SIZESLOW16: estimated cost of 8 for {{.*}} mul <3 x i16>
-; SIZEFAST16: estimated cost of 4 for {{.*}} mul <3 x i16>
 define amdgpu_kernel void @mul_i16() #0 {
+; SLOW16-LABEL: 'mul_i16'
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = mul i16 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i16 = mul <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3i16 = mul <3 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FAST16-LABEL: 'mul_i16'
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = mul i16 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW16-SIZE-LABEL: 'mul_i16'
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = mul i16 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FAST16-SIZE-LABEL: 'mul_i16'
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = mul i16 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = mul <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = mul <3 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i16 = mul i16 undef, undef
   %v2i16 = mul <2 x i16> undef, undef
   %v3i16 = mul <3 x i16> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll b/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll
index 6357408e89148..8c71d508c0608 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll
@@ -1,18 +1,30 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=amdgcn-unknown-amdhsa -cost-model -cost-kind=throughput -analyze | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-unknown-amdhsa -cost-model -cost-kind=throughput -analyze | FileCheck -check-prefixes=ALL %s
+; RUN: opt < %s -mtriple=amdgcn-unknown-amdhsa -cost-model -cost-kind=code-size -analyze | FileCheck -check-prefixes=ALL-SIZE %s
 ; END.
 
 define i32 @reduce_i1(i32 %arg) {
-; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
+; ALL-LABEL: 'reduce_i1'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
+;
+; ALL-SIZE-LABEL: 'reduce_i1'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
   %V2   = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll b/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll
index 906ead77c092c..e530d0481e32b 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll
@@ -1,18 +1,30 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=amdgcn-unknown-amdhsa -cost-model -cost-kind=throughput -analyze | FileCheck %s
+; RUN: opt < %s -mtriple=amdgcn-unknown-amdhsa -cost-model -cost-kind=throughput -analyze | FileCheck -check-prefixes=ALL %s
+; RUN: opt < %s -mtriple=amdgcn-unknown-amdhsa -cost-model -cost-kind=code-size -analyze | FileCheck -check-prefixes=ALL-SIZE %s
 ; END.
 
 define i32 @reduce_i1(i32 %arg) {
-; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
+; ALL-LABEL: 'reduce_i1'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
+;
+; ALL-SIZE-LABEL: 'reduce_i1'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
   %V2   = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
index f67a0fae8e127..f2a6a7902097f 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
@@ -1,18 +1,39 @@
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,FAST64,FAST16 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SLOW64,SLOW16 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIZEALL,FAST16 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIZEALL,SLOW16 %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOW64 %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST64-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOW64-SIZE %s
 ; END.
 
-; ALL-LABEL: 'shl'
-; ALL: estimated cost of 1 for {{.*}} shl i32
-; FAST64: estimated cost of 2 for {{.*}} shl i64
-; SLOW64: estimated cost of 4 for {{.*}} shl i64
-; SIZEALL: estimated cost of 2 for {{.*}} shl i64
-; ALL: estimated cost of 1 for {{.*}} shl i16
-; SLOW16: estimated cost of 2 for {{.*}} shl <2 x i16>
-; FAST16: estimated cost of 1 for {{.*}} shl <2 x i16>
 define amdgpu_kernel void @shl() #0 {
+; FAST64-LABEL: 'shl'
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shl <2 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW64-LABEL: 'shl'
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i64 = shl i64 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = shl <2 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FAST64-SIZE-LABEL: 'shl'
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shl <2 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SLOW64-SIZE-LABEL: 'shl'
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = shl <2 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i32 = shl i32 undef, undef
   %i64 = shl i64 undef, undef
   %i16 = shl i16 undef, undef
@@ -20,15 +41,35 @@ define amdgpu_kernel void @shl() #0 {
   ret void
 }
 
-; ALL-LABEL: 'lshr'
-; ALL: estimated cost of 1 for {{.*}} lshr i32
-; FAST64: estimated cost of 2 for {{.*}} lshr i64
-; SLOW64: estimated cost of 4 for {{.*}} lshr i64
-; SIZEALL: estimated cost of 2 for {{.*}} lshr i64
-; ALL: estimated cost of 1 for {{.*}} lshr i16
-; SLOW16: estimated cost of 2 for {{.*}} lshr <2 x i16>
-; FAST16: estimated cost of 1 for {{.*}} lshr <2 x i16>
 define amdgpu_kernel void @lshr() #0 {
+; FAST64-LABEL: 'lshr'
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = lshr <2 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW64-LABEL: 'lshr'
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i64 = lshr i64 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = lshr <2 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FAST64-SIZE-LABEL: 'lshr'
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = lshr <2 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SLOW64-SIZE-LABEL: 'lshr'
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = lshr <2 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i32 = lshr i32 undef, undef
   %i64 = lshr i64 undef, undef
   %i16 = lshr i16 undef, undef
@@ -36,14 +77,35 @@ define amdgpu_kernel void @lshr() #0 {
   ret void
 }
 
-; ALL-LABEL: 'ashr'
-; ALL: estimated cost of 1 for {{.*}} ashr i32
-; FAST64: estimated cost of 2 for {{.*}} ashr i64
-; SLOW64: estimated cost of 4 for {{.*}} ashr i64
-; ALL: estimated cost of 1 for {{.*}} ashr i16
-; SLOW16: estimated cost of 2 for {{.*}} ashr <2 x i16>
-; FAST16: estimated cost of 1 for {{.*}} ashr <2 x i16>
 define amdgpu_kernel void @ashr() #0 {
+; FAST64-LABEL: 'ashr'
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = ashr <2 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SLOW64-LABEL: 'ashr'
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i64 = ashr i64 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = ashr <2 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FAST64-SIZE-LABEL: 'ashr'
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = ashr <2 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SLOW64-SIZE-LABEL: 'ashr'
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = ashr <2 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
   %i32 = ashr i32 undef, undef
   %i64 = ashr i64 undef, undef
   %i16 = ashr i16 undef, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
index cdbd9ec570abe..17c21affbf9a6 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
@@ -1,52 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -S | FileCheck -check-prefixes=GFX10 %s
-; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -cost-kind=code-size -S | FileCheck -check-prefixes=GFX10-CS %s
-; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -S | FileCheck -check-prefixes=GFX9 %s
-; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -cost-kind=code-size -S | FileCheck -check-prefixes=GFX9-CS %s
-; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji -S | FileCheck -check-prefixes=VI %s
-; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji -cost-kind=code-size -S | FileCheck -check-prefixes=VI-CS %s
+; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -S | FileCheck -check-prefixes=ALL,GFX9-10 %s
+; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -S | FileCheck -check-prefixes=ALL,GFX9-10 %s
+; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji -S | FileCheck -check-prefixes=ALL,VI %s
+; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -cost-kind=code-size -S | FileCheck -check-prefixes=ALL-SIZE,GFX9-10-SIZE %s
+; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -cost-kind=code-size -S | FileCheck -check-prefixes=ALL-SIZE,GFX9-10-SIZE %s
+; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji -cost-kind=code-size -S | FileCheck -check-prefixes=ALL-SIZE,VI-SIZE %s
 ; END.
 
 define amdgpu_kernel void @shufflevector_00_v2i16(<2 x i16> %vec0, <2 x i16> %vec1) {
-; GFX10-LABEL: 'shufflevector_00_v2i16'
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; GFX10-CS-LABEL: 'shufflevector_00_v2i16'
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; GFX9-LABEL: 'shufflevector_00_v2i16'
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; GFX9-CS-LABEL: 'shufflevector_00_v2i16'
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; GFX9-10-LABEL: 'shufflevector_00_v2i16'
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; VI-LABEL: 'shufflevector_00_v2i16'
 ; VI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
@@ -58,15 +28,25 @@ define amdgpu_kernel void @shufflevector_00_v2i16(<2 x i16> %vec0, <2 x i16> %ve
 ; VI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
 ; VI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; VI-CS-LABEL: 'shufflevector_00_v2i16'
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; GFX9-10-SIZE-LABEL: 'shufflevector_00_v2i16'
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; VI-SIZE-LABEL: 'shufflevector_00_v2i16'
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
   %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
@@ -80,35 +60,15 @@ define amdgpu_kernel void @shufflevector_00_v2i16(<2 x i16> %vec0, <2 x i16> %ve
 
 ; Should not assert
 define amdgpu_kernel void @shufflevector_xxx(<2 x i8> %vec0) {
-; GFX10-LABEL: 'shufflevector_xxx'
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; GFX10-CS-LABEL: 'shufflevector_xxx'
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; GFX9-LABEL: 'shufflevector_xxx'
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+; ALL-LABEL: 'shufflevector_xxx'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; GFX9-CS-LABEL: 'shufflevector_xxx'
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; VI-LABEL: 'shufflevector_xxx'
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; VI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; VI-CS-LABEL: 'shufflevector_xxx'
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-SIZE-LABEL: 'shufflevector_xxx'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -117,61 +77,19 @@ define amdgpu_kernel void @shufflevector_xxx(<2 x i8> %vec0) {
 
 ; Other shuffle cases
 define void @shuffle() {
-; GFX10-LABEL: 'shuffle'
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; GFX10-CS-LABEL: 'shuffle'
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; GFX9-LABEL: 'shuffle'
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; GFX9-CS-LABEL: 'shuffle'
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; GFX9-10-LABEL: 'shuffle'
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; VI-LABEL: 'shuffle'
 ; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
@@ -187,19 +105,33 @@ define void @shuffle() {
 ; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
 ; VI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; VI-CS-LABEL: 'shuffle'
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; GFX9-10-SIZE-LABEL: 'shuffle'
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; VI-SIZE-LABEL: 'shuffle'
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
   %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
@@ -216,113 +148,41 @@ define void @shuffle() {
 }
 
 define void @concat() {
-; GFX10-LABEL: 'concat'
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; GFX10-CS-LABEL: 'concat'
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX10-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; GFX9-LABEL: 'concat'
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; GFX9-CS-LABEL: 'concat'
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX9-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; VI-LABEL: 'concat'
-; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; VI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; VI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; VI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; VI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; VI-CS-LABEL: 'concat'
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; VI-CS-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-LABEL: 'concat'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'concat'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>