[llvm] b021bdb - [AArch64] Add codesize test coverage. NFC

Thu Feb 27 00:57:53 PST 2025

Author: David Green
Date: 2025-02-27T08:57:48Z
New Revision: b021bdbb3997ef6dd13980dc44f24754f15f3652

URL: https://github.com/llvm/llvm-project/commit/b021bdbb3997ef6dd13980dc44f24754f15f3652
DIFF: https://github.com/llvm/llvm-project/commit/b021bdbb3997ef6dd13980dc44f24754f15f3652.diff

LOG: [AArch64] Add codesize test coverage. NFC

This adds some basic codesize test coverage for a number of instructions. Much of
the results returned are not very accurate yet, especially around larger vector
types but also some basic operations.

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/AArch64/aggregates.ll
    llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
    llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
    llvm/test/Analysis/CostModel/AArch64/arith.ll
    llvm/test/Analysis/CostModel/AArch64/bitreverse.ll
    llvm/test/Analysis/CostModel/AArch64/fshl.ll
    llvm/test/Analysis/CostModel/AArch64/fshr.ll
    llvm/test/Analysis/CostModel/AArch64/gep.ll
    llvm/test/Analysis/CostModel/AArch64/min-max.ll
    llvm/test/Analysis/CostModel/AArch64/mul.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-add.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
    llvm/test/Analysis/CostModel/AArch64/select.ll
    llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll
    llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/AArch64/aggregates.ll b/llvm/test/Analysis/CostModel/AArch64/aggregates.ll
index ebe29716ed671..75e1e655c9d52 100644

--- a/llvm/test/Analysis/CostModel/AArch64/aggregates.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/aggregates.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=THROUGHPUT
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=latency 2>&1 -disable-output | FileCheck %s --check-prefix=LATENCY
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=CODESIZE
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=THROUGHPUT
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=latency 2>&1 -disable-output | FileCheck %s --check-prefix=LATENCY
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=CODESIZE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 

diff  --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index b329a5607acb9..3e9d2259cf5f4 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -1,22 +1,37 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefix=RECIP
+; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefix=SIZE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define i32 @fadd(i32 %arg) {
-; CHECK-LABEL: 'fadd'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fadd half undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fadd'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fadd half undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd <4 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd <8 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd <16 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <2 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fadd'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fadd half undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd <4 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd <8 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F16 = fadd <16 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <2 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fadd <8 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fadd double undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fadd <4 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = fadd half undef, undef
   %V4F16 = fadd <4 x half> undef, undef
@@ -36,19 +51,33 @@ define i32 @fadd(i32 %arg) {
 }
 
 define i32 @fsub(i32 %arg) {
-; CHECK-LABEL: 'fsub'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fsub <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fsub'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fsub <16 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fsub'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F16 = fsub <16 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = fsub half undef, undef
   %V4F16 = fsub <4 x half> undef, undef
@@ -68,18 +97,31 @@ define i32 @fsub(i32 %arg) {
 }
 
 define i32 @fneg_idiom(i32 %arg) {
-; CHECK-LABEL: 'fneg_idiom'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half 0xH8000, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> splat (float -0.000000e+00), undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fneg_idiom'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half 0xH8000, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> splat (float -0.000000e+00), undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fneg_idiom'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half 0xH8000, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> splat (float -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = fsub half -0.0, undef
   %V4F16 = fsub <4 x half> <half -0.0, half -0.0, half -0.0, half -0.0>, undef
@@ -98,20 +140,35 @@ define i32 @fneg_idiom(i32 %arg) {
 }
 
 define i32 @fneg(i32 %arg) {
-; CHECK-LABEL: 'fneg'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fneg'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fneg'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F16 = fneg <16 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = fneg half undef
   %V2F16 = fneg <2 x half> undef
@@ -132,32 +189,59 @@ define i32 @fneg(i32 %arg) {
 }
 
 define i32 @fmulfneg(i32 %arg) {
-; CHECK-LABEL: 'fmulfneg'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F16 = fneg half undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16M = fmul half %F16, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16M = fmul <2 x half> %V2F16, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16M = fmul <4 x half> %V4F16, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16M = fmul <8 x half> %V8F16, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16M = fmul <16 x half> %V16F16, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F32 = fneg float undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32M = fmul float %F32, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32M = fmul <2 x float> %V2F32, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32M = fmul <4 x float> %V4F32, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32M = fmul <8 x float> %V8F32, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F64 = fneg double undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64M = fmul double %F64, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64M = fmul <2 x double> %V2F64, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64M = fmul <4 x double> %V4F64, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fmulfneg'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F16 = fneg half undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16M = fmul half %F16, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16M = fmul <2 x half> %V2F16, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16M = fmul <4 x half> %V4F16, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16M = fmul <8 x half> %V8F16, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16M = fmul <16 x half> %V16F16, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F32 = fneg float undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32M = fmul float %F32, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32M = fmul <2 x float> %V2F32, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32M = fmul <4 x float> %V4F32, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32M = fmul <8 x float> %V8F32, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F64 = fneg double undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64M = fmul double %F64, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64M = fmul <2 x double> %V2F64, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64M = fmul <4 x double> %V4F64, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fmulfneg'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16M = fmul half %F16, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16M = fmul <2 x half> %V2F16, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16M = fmul <4 x half> %V4F16, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16M = fmul <8 x half> %V8F16, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F16 = fneg <16 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F16M = fmul <16 x half> %V16F16, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32M = fmul float %F32, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32M = fmul <2 x float> %V2F32, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32M = fmul <4 x float> %V4F32, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32M = fmul <8 x float> %V8F32, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64M = fmul double %F64, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64M = fmul <2 x double> %V2F64, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64M = fmul <4 x double> %V4F64, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = fneg half undef
   %F16M = fmul half %F16, undef
@@ -190,32 +274,59 @@ define i32 @fmulfneg(i32 %arg) {
 }
 
 define i32 @fnegfmul(i32 %arg) {
-; CHECK-LABEL: 'fnegfmul'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16M = fmul half undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F16 = fneg half %F16M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16M = fmul <2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> %V2F16M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16M = fmul <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> %V4F16M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16M = fmul <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> %V8F16M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16M = fmul <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> %V16F16M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32M = fmul float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F32 = fneg float %F32M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32M = fmul <2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> %V2F32M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32M = fmul <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> %V4F32M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32M = fmul <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> %V8F32M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64M = fmul double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F64 = fneg double %F64M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64M = fmul <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> %V2F64M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64M = fmul <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> %V4F64M
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fnegfmul'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16M = fmul half undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F16 = fneg half %F16M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16M = fmul <2 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> %V2F16M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16M = fmul <4 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> %V4F16M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16M = fmul <8 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> %V8F16M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16M = fmul <16 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> %V16F16M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32M = fmul float undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F32 = fneg float %F32M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32M = fmul <2 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> %V2F32M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32M = fmul <4 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> %V4F32M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32M = fmul <8 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> %V8F32M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64M = fmul double undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F64 = fneg double %F64M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64M = fmul <2 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> %V2F64M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64M = fmul <4 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> %V4F64M
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fnegfmul'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16M = fmul half undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half %F16M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16M = fmul <2 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> %V2F16M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16M = fmul <4 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> %V4F16M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16M = fmul <8 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> %V8F16M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F16M = fmul <16 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F16 = fneg <16 x half> %V16F16M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32M = fmul float undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float %F32M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32M = fmul <2 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> %V2F32M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32M = fmul <4 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> %V4F32M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32M = fmul <8 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fneg <8 x float> %V8F32M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64M = fmul double undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double %F64M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64M = fmul <2 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> %V2F64M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64M = fmul <4 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fneg <4 x double> %V4F64M
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16M = fmul half undef, undef
   %F16 = fneg half %F16M
@@ -248,19 +359,33 @@ define i32 @fnegfmul(i32 %arg) {
 }
 
 define i32 @fmul(i32 %arg) {
-; CHECK-LABEL: 'fmul'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fmul'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fmul'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fmul half undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fmul <4 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fmul <8 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F16 = fmul <16 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fmul float undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fmul <2 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fmul <8 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fmul <4 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = fmul half undef, undef
   %V4F16 = fmul <4 x half> undef, undef
@@ -280,19 +405,33 @@ define i32 @fmul(i32 %arg) {
 }
 
 define i32 @fdiv(i32 %arg) {
-; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fdiv float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fdiv double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fdiv'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fdiv float undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fdiv double undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fdiv'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F16 = fdiv half undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F16 = fdiv <4 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F16 = fdiv <8 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = fdiv <2 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = fdiv half undef, undef
   %V4F16 = fdiv <4 x half> undef, undef
@@ -312,19 +451,33 @@ define i32 @fdiv(i32 %arg) {
 }
 
 define i32 @frem(i32 %arg) {
-; CHECK-LABEL: 'frem'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F16 = frem half undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4F16 = frem <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8F16 = frem <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V16F16 = frem <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = frem float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2F32 = frem <2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4F32 = frem <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8F32 = frem <8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = frem double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2F64 = frem <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4F64 = frem <4 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'frem'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F16 = frem half undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4F16 = frem <4 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8F16 = frem <8 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V16F16 = frem <16 x half> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = frem float undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2F32 = frem <2 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4F32 = frem <4 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8F32 = frem <8 x float> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = frem double undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2F64 = frem <2 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4F64 = frem <4 x double> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'frem'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F16 = frem half undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F16 = frem <4 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F16 = frem <8 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = frem <16 x half> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = frem <2 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = frem <4 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = frem <8 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = frem <4 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = frem half undef, undef
   %V4F16 = frem <4 x half> undef, undef
@@ -344,19 +497,33 @@ define i32 @frem(i32 %arg) {
 }
 
 define i32 @fsqrt(i32 %arg) {
-; CHECK-LABEL: 'fsqrt'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.sqrt.f16(half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fsqrt'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.sqrt.f16(half undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fsqrt'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.sqrt.f16(half undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = call half @llvm.sqrt.f16(half undef)
   %V4F16 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
@@ -376,19 +543,33 @@ define i32 @fsqrt(i32 %arg) {
 }
 
 define i32 @fabs(i32 %arg) {
-; CHECK-LABEL: 'fabs'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fabs.f16(half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fabs'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fabs.f16(half undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fabs'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fabs.f16(half undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = call half @llvm.fabs.f16(half undef)
   %V4F16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
@@ -408,19 +589,33 @@ define i32 @fabs(i32 %arg) {
 }
 
 define i32 @fcopysign(i32 %arg) {
-; CHECK-LABEL: 'fcopysign'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fcopysign'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fcopysign'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = call half @llvm.copysign.f16(half undef, half undef)
   %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
@@ -440,19 +635,33 @@ define i32 @fcopysign(i32 %arg) {
 }
 
 define i32 @fma(i32 %arg) {
-; CHECK-LABEL: 'fma'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call float @llvm.fma.f32(float undef, float undef, float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; RECIP-LABEL: 'fma'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call float @llvm.fma.f32(float undef, float undef, float undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'fma'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call float @llvm.fma.f32(float undef, float undef, float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F16 = call float @llvm.fma.f32(float undef, float undef, float undef)
   %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
index 303bcfa289577..afb75f288d0c1 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
@@ -1,71 +1,135 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RECIP
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @extaddv1(i8 %i8, i16 %i16, i32 %i32, i64 %i64)  {
-; CHECK-LABEL: 'extaddv1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add i16 %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add i16 %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add i16 %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add i16 %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add i32 %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add i32 %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add i32 %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add i32 %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = add i64 %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = add i64 %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_64 = zext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = add i64 %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_64 = zext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_64 = zext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = add i64 %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add i32 %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add i32 %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add i32 %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add i32 %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = add i64 %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = add i64 %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_64 = zext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = add i64 %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_64 = zext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_64 = zext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = add i64 %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = add i64 %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_32_64 = sext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_32_64 = sext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = add i64 %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = add i64 %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = add i64 %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extaddv1'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add i16 %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add i16 %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add i16 %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add i16 %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add i32 %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add i32 %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add i32 %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add i32 %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = add i64 %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = add i64 %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_64 = zext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = add i64 %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_64 = zext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_64 = zext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = add i64 %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add i32 %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add i32 %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add i32 %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add i32 %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = add i64 %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = add i64 %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_64 = zext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = add i64 %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_64 = zext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_64 = zext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = add i64 %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = add i64 %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_32_64 = sext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_32_64 = sext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = add i64 %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = add i64 %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = add i64 %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extaddv1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add i16 %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add i16 %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add i16 %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add i16 %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add i32 %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add i32 %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add i32 %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add i32 %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = add i64 %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = add i64 %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_64 = zext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = add i64 %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_64 = zext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_64 = zext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = add i64 %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add i32 %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add i32 %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add i32 %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add i32 %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = add i64 %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = add i64 %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_64 = zext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = add i64 %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_64 = zext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_64 = zext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = add i64 %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = add i64 %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_32_64 = sext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_32_64 = sext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = add i64 %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = add i64 %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = add i64 %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext i8 %i8 to i16
   %asw_8_16 = add i16 %i16, %sw_8_16
@@ -143,68 +207,131 @@ define void @extaddv1(i8 %i8, i16 %i16, i32 %i32, i64 %i64)  {
 }
 
 define void @extaddv2(<2 x i8> %i8, <2 x i16> %i16, <2 x i32> %i32, <2 x i64> %i64)  {
-; CHECK-LABEL: 'extaddv2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add <2 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add <2 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add <2 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add <2 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add <2 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add <2 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add <2 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add <2 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = add <2 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = add <2 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = add <2 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = add <2 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add <2 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add <2 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add <2 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add <2 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = add <2 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = add <2 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = add <2 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = add <2 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = add <2 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = add <2 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = add <2 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = add <2 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extaddv2'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add <2 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add <2 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add <2 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add <2 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add <2 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add <2 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add <2 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add <2 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = add <2 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = add <2 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = add <2 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = add <2 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add <2 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add <2 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add <2 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add <2 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = add <2 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = add <2 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = add <2 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = add <2 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = add <2 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = add <2 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = add <2 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = add <2 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extaddv2'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add <2 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add <2 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add <2 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add <2 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add <2 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add <2 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add <2 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add <2 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = add <2 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = add <2 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = add <2 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = add <2 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add <2 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add <2 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add <2 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add <2 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = add <2 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = add <2 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = add <2 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = add <2 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = add <2 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = add <2 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = add <2 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = add <2 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
   %asw_8_16 = add <2 x i16> %i16, %sw_8_16
@@ -282,68 +409,131 @@ define void @extaddv2(<2 x i8> %i8, <2 x i16> %i16, <2 x i32> %i32, <2 x i64> %i
 }
 
 define void @extaddv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i64)  {
-; CHECK-LABEL: 'extaddv4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add <4 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add <4 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add <4 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add <4 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add <4 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add <4 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add <4 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add <4 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_64 = add <4 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_64 = add <4 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_64 = add <4 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_64 = add <4 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add <4 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add <4 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add <4 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add <4 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_16_64 = add <4 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_16_64 = add <4 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_16_64 = add <4 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_16_64 = add <4 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_32_64 = add <4 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_32_64 = add <4 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_32_64 = add <4 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_32_64 = add <4 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extaddv4'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add <4 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add <4 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add <4 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add <4 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add <4 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add <4 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add <4 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add <4 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_64 = add <4 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_64 = add <4 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_64 = add <4 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_64 = add <4 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add <4 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add <4 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add <4 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add <4 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_16_64 = add <4 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_16_64 = add <4 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_16_64 = add <4 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_16_64 = add <4 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_32_64 = add <4 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_32_64 = add <4 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_32_64 = add <4 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_32_64 = add <4 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extaddv4'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add <4 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add <4 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add <4 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add <4 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add <4 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add <4 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add <4 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add <4 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = add <4 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = add <4 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = add <4 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = add <4 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add <4 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add <4 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add <4 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add <4 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = add <4 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = add <4 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = add <4 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = add <4 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = add <4 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = add <4 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = add <4 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = add <4 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
   %asw_8_16 = add <4 x i16> %i16, %sw_8_16
@@ -421,68 +611,131 @@ define void @extaddv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i
 }
 
 define void @extaddv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i64)  {
-; CHECK-LABEL: 'extaddv8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add <8 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add <8 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add <8 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add <8 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_32 = add <8 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_32 = add <8 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_32 = add <8 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_32 = add <8 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_8_64 = add <8 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_8_64 = add <8 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_8_64 = add <8 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_8_64 = add <8 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_16_32 = add <8 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_16_32 = add <8 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_16_32 = add <8 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_16_32 = add <8 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_16_64 = add <8 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_16_64 = add <8 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_16_64 = add <8 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_16_64 = add <8 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_32_64 = add <8 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_32_64 = add <8 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_32_64 = add <8 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_32_64 = add <8 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extaddv8'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add <8 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add <8 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add <8 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add <8 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_32 = add <8 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_32 = add <8 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_32 = add <8 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_32 = add <8 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_8_64 = add <8 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_8_64 = add <8 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_8_64 = add <8 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_8_64 = add <8 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_16_32 = add <8 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_16_32 = add <8 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_16_32 = add <8 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_16_32 = add <8 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_16_64 = add <8 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_16_64 = add <8 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_16_64 = add <8 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_16_64 = add <8 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_32_64 = add <8 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_32_64 = add <8 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_32_64 = add <8 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_32_64 = add <8 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extaddv8'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add <8 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add <8 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add <8 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add <8 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add <8 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add <8 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add <8 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add <8 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = add <8 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = add <8 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = add <8 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = add <8 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add <8 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add <8 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add <8 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add <8 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = add <8 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = add <8 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = add <8 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = add <8 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = add <8 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = add <8 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = add <8 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = add <8 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
   %asw_8_16 = add <8 x i16> %i16, %sw_8_16
@@ -560,68 +813,131 @@ define void @extaddv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i
 }
 
 define void @extaddv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i64> %i64)  {
-; CHECK-LABEL: 'extaddv16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_16 = add <16 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_16 = add <16 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_16 = add <16 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_16 = add <16 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_8_32 = add <16 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_8_32 = add <16 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_8_32 = add <16 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_8_32 = add <16 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_8_64 = add <16 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_8_64 = add <16 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_8_64 = add <16 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_8_64 = add <16 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_16_32 = add <16 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_16_32 = add <16 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_16_32 = add <16 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_16_32 = add <16 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_16_64 = add <16 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_16_64 = add <16 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_16_64 = add <16 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_16_64 = add <16 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_32_64 = add <16 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_32_64 = add <16 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_32_64 = add <16 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_32_64 = add <16 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extaddv16'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_16 = add <16 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_16 = add <16 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_16 = add <16 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_16 = add <16 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_8_32 = add <16 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_8_32 = add <16 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_8_32 = add <16 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_8_32 = add <16 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_8_64 = add <16 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_8_64 = add <16 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_8_64 = add <16 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_8_64 = add <16 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_16_32 = add <16 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_16_32 = add <16 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_16_32 = add <16 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_16_32 = add <16 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_16_64 = add <16 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_16_64 = add <16 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_16_64 = add <16 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_16_64 = add <16 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_32_64 = add <16 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_32_64 = add <16 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_32_64 = add <16 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_32_64 = add <16 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extaddv16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = add <16 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = add <16 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = add <16 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = add <16 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = add <16 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = add <16 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = add <16 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = add <16 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = add <16 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = add <16 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = add <16 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = add <16 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = add <16 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = add <16 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = add <16 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = add <16 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = add <16 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = add <16 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = add <16 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = add <16 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = add <16 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = add <16 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = add <16 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = add <16 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
   %asw_8_16 = add <16 x i16> %i16, %sw_8_16
@@ -699,68 +1015,131 @@ define void @extaddv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6
 }
 
 define void @extsubv1(i8 %i8, i16 %i16, i32 %i32, i64 %i64)  {
-; CHECK-LABEL: 'extsubv1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub i16 %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub i16 %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub i16 %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub i16 %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub i32 %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub i32 %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub i32 %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub i32 %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = sub i64 %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = sub i64 %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_64 = zext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = sub i64 %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_64 = zext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_64 = zext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = sub i64 %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub i32 %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub i32 %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub i32 %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub i32 %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = sub i64 %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = sub i64 %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_64 = zext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = sub i64 %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_64 = zext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_64 = zext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = sub i64 %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = sub i64 %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_32_64 = sext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_32_64 = sext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = sub i64 %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = sub i64 %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = sub i64 %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extsubv1'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub i16 %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub i16 %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub i16 %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub i16 %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub i32 %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub i32 %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub i32 %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub i32 %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = sub i64 %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = sub i64 %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_64 = zext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = sub i64 %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_64 = zext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_64 = zext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = sub i64 %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub i32 %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub i32 %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub i32 %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub i32 %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = sub i64 %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = sub i64 %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_64 = zext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = sub i64 %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_64 = zext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_64 = zext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = sub i64 %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = sub i64 %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_32_64 = sext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_32_64 = sext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = sub i64 %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = sub i64 %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = sub i64 %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extsubv1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub i16 %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub i16 %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub i16 %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub i16 %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub i32 %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub i32 %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub i32 %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub i32 %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = sub i64 %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = sub i64 %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_64 = zext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = sub i64 %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_64 = zext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_64 = zext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = sub i64 %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub i32 %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub i32 %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub i32 %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub i32 %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = sub i64 %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = sub i64 %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_64 = zext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = sub i64 %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_64 = zext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_64 = zext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = sub i64 %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = sub i64 %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_32_64 = sext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_32_64 = sext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = sub i64 %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = sub i64 %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = sub i64 %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext i8 %i8 to i16
   %asw_8_16 = sub i16 %i16, %sw_8_16
@@ -838,68 +1217,131 @@ define void @extsubv1(i8 %i8, i16 %i16, i32 %i32, i64 %i64)  {
 }
 
 define void @extsubv2(<2 x i8> %i8, <2 x i16> %i16, <2 x i32> %i32, <2 x i64> %i64)  {
-; CHECK-LABEL: 'extsubv2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub <2 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub <2 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub <2 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub <2 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub <2 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub <2 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub <2 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub <2 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = sub <2 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = sub <2 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = sub <2 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = sub <2 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub <2 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub <2 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub <2 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub <2 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = sub <2 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = sub <2 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = sub <2 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = sub <2 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = sub <2 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = sub <2 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = sub <2 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = sub <2 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extsubv2'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub <2 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub <2 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub <2 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub <2 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub <2 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub <2 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub <2 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub <2 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = sub <2 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = sub <2 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = sub <2 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = sub <2 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub <2 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub <2 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub <2 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub <2 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = sub <2 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = sub <2 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = sub <2 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = sub <2 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = sub <2 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = sub <2 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = sub <2 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = sub <2 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extsubv2'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub <2 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub <2 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub <2 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub <2 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub <2 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub <2 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub <2 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub <2 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = sub <2 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = sub <2 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = sub <2 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = sub <2 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub <2 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub <2 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub <2 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub <2 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = sub <2 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = sub <2 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = sub <2 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = sub <2 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = sub <2 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = sub <2 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = sub <2 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = sub <2 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
   %asw_8_16 = sub <2 x i16> %i16, %sw_8_16
@@ -977,68 +1419,131 @@ define void @extsubv2(<2 x i8> %i8, <2 x i16> %i16, <2 x i32> %i32, <2 x i64> %i
 }
 
 define void @extsubv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i64)  {
-; CHECK-LABEL: 'extsubv4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub <4 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub <4 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub <4 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub <4 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub <4 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub <4 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub <4 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub <4 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_64 = sub <4 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_64 = sub <4 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_64 = sub <4 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_64 = sub <4 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub <4 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub <4 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub <4 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub <4 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_16_64 = sub <4 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_16_64 = sub <4 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_16_64 = sub <4 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_16_64 = sub <4 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_32_64 = sub <4 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_32_64 = sub <4 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_32_64 = sub <4 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_32_64 = sub <4 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extsubv4'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub <4 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub <4 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub <4 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub <4 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub <4 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub <4 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub <4 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub <4 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_64 = sub <4 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_64 = sub <4 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_64 = sub <4 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_64 = sub <4 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub <4 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub <4 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub <4 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub <4 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_16_64 = sub <4 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_16_64 = sub <4 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_16_64 = sub <4 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_16_64 = sub <4 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_32_64 = sub <4 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_32_64 = sub <4 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_32_64 = sub <4 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_32_64 = sub <4 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extsubv4'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub <4 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub <4 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub <4 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub <4 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub <4 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub <4 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub <4 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub <4 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = sub <4 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = sub <4 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = sub <4 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = sub <4 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub <4 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub <4 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub <4 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub <4 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = sub <4 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = sub <4 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = sub <4 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = sub <4 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = sub <4 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = sub <4 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = sub <4 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = sub <4 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
   %asw_8_16 = sub <4 x i16> %i16, %sw_8_16
@@ -1116,68 +1621,131 @@ define void @extsubv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i
 }
 
 define void @extsubv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i64)  {
-; CHECK-LABEL: 'extsubv8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub <8 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub <8 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub <8 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub <8 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_32 = sub <8 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_32 = sub <8 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_32 = sub <8 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_32 = sub <8 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_8_64 = sub <8 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_8_64 = sub <8 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_8_64 = sub <8 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_8_64 = sub <8 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_16_32 = sub <8 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_16_32 = sub <8 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_16_32 = sub <8 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_16_32 = sub <8 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_16_64 = sub <8 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_16_64 = sub <8 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_16_64 = sub <8 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_16_64 = sub <8 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_32_64 = sub <8 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_32_64 = sub <8 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_32_64 = sub <8 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_32_64 = sub <8 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extsubv8'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub <8 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub <8 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub <8 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub <8 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_32 = sub <8 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_32 = sub <8 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_32 = sub <8 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_32 = sub <8 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_8_64 = sub <8 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_8_64 = sub <8 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_8_64 = sub <8 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_8_64 = sub <8 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_16_32 = sub <8 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_16_32 = sub <8 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_16_32 = sub <8 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_16_32 = sub <8 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_16_64 = sub <8 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_16_64 = sub <8 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_16_64 = sub <8 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_16_64 = sub <8 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_32_64 = sub <8 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_32_64 = sub <8 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_32_64 = sub <8 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_32_64 = sub <8 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extsubv8'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub <8 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub <8 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub <8 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub <8 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub <8 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub <8 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub <8 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub <8 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = sub <8 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = sub <8 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = sub <8 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = sub <8 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub <8 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub <8 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub <8 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub <8 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = sub <8 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = sub <8 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = sub <8 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = sub <8 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = sub <8 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = sub <8 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = sub <8 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = sub <8 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
   %asw_8_16 = sub <8 x i16> %i16, %sw_8_16
@@ -1255,68 +1823,131 @@ define void @extsubv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i
 }
 
 define void @extsubv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i64> %i64)  {
-; CHECK-LABEL: 'extsubv16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_16 = sub <16 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_16 = sub <16 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_16 = sub <16 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_16 = sub <16 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_8_32 = sub <16 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_8_32 = sub <16 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_8_32 = sub <16 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_8_32 = sub <16 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_8_64 = sub <16 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_8_64 = sub <16 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_8_64 = sub <16 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_8_64 = sub <16 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_16_32 = sub <16 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_16_32 = sub <16 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_16_32 = sub <16 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_16_32 = sub <16 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_16_64 = sub <16 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_16_64 = sub <16 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_16_64 = sub <16 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_16_64 = sub <16 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_32_64 = sub <16 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_32_64 = sub <16 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_32_64 = sub <16 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_32_64 = sub <16 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extsubv16'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_16 = sub <16 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_16 = sub <16 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_16 = sub <16 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_16 = sub <16 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_8_32 = sub <16 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_8_32 = sub <16 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_8_32 = sub <16 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_8_32 = sub <16 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_8_64 = sub <16 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_8_64 = sub <16 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_8_64 = sub <16 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_8_64 = sub <16 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_16_32 = sub <16 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_16_32 = sub <16 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_16_32 = sub <16 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_16_32 = sub <16 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_16_64 = sub <16 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_16_64 = sub <16 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_16_64 = sub <16 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_16_64 = sub <16 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asw_32_64 = sub <16 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_32_64 = sub <16 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azw_32_64 = sub <16 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_32_64 = sub <16 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extsubv16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = sub <16 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = sub <16 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = sub <16 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = sub <16 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = sub <16 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = sub <16 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = sub <16 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = sub <16 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = sub <16 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = sub <16 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = sub <16 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = sub <16 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = sub <16 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = sub <16 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = sub <16 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = sub <16 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = sub <16 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = sub <16 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = sub <16 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = sub <16 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = sub <16 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = sub <16 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = sub <16 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = sub <16 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
   %asw_8_16 = sub <16 x i16> %i16, %sw_8_16
@@ -1394,68 +2025,131 @@ define void @extsubv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6
 }
 
 define void @extmulv1(i8 %i8, i16 %i16, i32 %i32, i64 %i64)  {
-; CHECK-LABEL: 'extmulv1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul i16 %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul i16 %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul i16 %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext i8 %i8 to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul i16 %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul i32 %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul i32 %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul i32 %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext i8 %i8 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul i32 %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = mul i64 %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = mul i64 %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_64 = zext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = mul i64 %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_64 = zext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_64 = zext i8 %i8 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = mul i64 %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul i32 %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul i32 %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul i32 %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext i16 %i16 to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul i32 %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = mul i64 %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = mul i64 %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_64 = zext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = mul i64 %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_64 = zext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_64 = zext i16 %i16 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = mul i64 %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = mul i64 %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_32_64 = sext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_32_64 = sext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = mul i64 %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = mul i64 %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext i32 %i32 to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = mul i64 %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extmulv1'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul i16 %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul i16 %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul i16 %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext i8 %i8 to i16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul i16 %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul i32 %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul i32 %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul i32 %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext i8 %i8 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul i32 %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = mul i64 %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = mul i64 %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_64 = zext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = mul i64 %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_64 = zext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_64 = zext i8 %i8 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = mul i64 %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul i32 %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul i32 %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul i32 %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext i16 %i16 to i32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul i32 %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = mul i64 %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = mul i64 %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_64 = zext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = mul i64 %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_64 = zext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_64 = zext i16 %i16 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = mul i64 %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = mul i64 %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_32_64 = sext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_32_64 = sext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = mul i64 %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = mul i64 %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext i32 %i32 to i64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = mul i64 %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extmulv1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul i16 %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul i16 %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul i16 %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext i8 %i8 to i16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul i16 %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul i32 %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul i32 %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul i32 %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext i8 %i8 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul i32 %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = mul i64 %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = mul i64 %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_8_64 = zext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = mul i64 %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_64 = zext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_64 = zext i8 %i8 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = mul i64 %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul i32 %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul i32 %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul i32 %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext i16 %i16 to i32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul i32 %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = mul i64 %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = mul i64 %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_16_64 = zext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = mul i64 %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_64 = zext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_64 = zext i16 %i16 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = mul i64 %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = mul i64 %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_32_64 = sext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_32_64 = sext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = mul i64 %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zw_32_64 = zext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = mul i64 %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext i32 %i32 to i64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = mul i64 %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext i8 %i8 to i16
   %asw_8_16 = mul i16 %i16, %sw_8_16
@@ -1533,68 +2227,131 @@ define void @extmulv1(i8 %i8, i16 %i16, i32 %i32, i64 %i64)  {
 }
 
 define void @extmulv2(<2 x i8> %i8, <2 x i16> %i16, <2 x i32> %i32, <2 x i64> %i64)  {
-; CHECK-LABEL: 'extmulv2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <2 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <2 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <2 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <2 x i8> %i8 to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <2 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul <2 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul <2 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul <2 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <2 x i8> %i8 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul <2 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %asw_8_64 = mul <2 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %asl_8_64 = mul <2 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %azw_8_64 = mul <2 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <2 x i8> %i8 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %azl_8_64 = mul <2 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul <2 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul <2 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul <2 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext <2 x i16> %i16 to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul <2 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %asw_16_64 = mul <2 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %asl_16_64 = mul <2 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %azw_16_64 = mul <2 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <2 x i16> %i16 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %azl_16_64 = mul <2 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %asw_32_64 = mul <2 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = mul <2 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %azw_32_64 = mul <2 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = mul <2 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extmulv2'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <2 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <2 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <2 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <2 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul <2 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul <2 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul <2 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul <2 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %asw_8_64 = mul <2 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %asl_8_64 = mul <2 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %azw_8_64 = mul <2 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %azl_8_64 = mul <2 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul <2 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul <2 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul <2 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul <2 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %asw_16_64 = mul <2 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %asl_16_64 = mul <2 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %azw_16_64 = mul <2 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %azl_16_64 = mul <2 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %asw_32_64 = mul <2 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = mul <2 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %azw_32_64 = mul <2 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = mul <2 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extmulv2'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <2 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <2 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <2 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <2 x i8> %i8 to <2 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <2 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul <2 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul <2 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul <2 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <2 x i8> %i8 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul <2 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = mul <2 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = mul <2 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = mul <2 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <2 x i8> %i8 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = mul <2 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul <2 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_32 = sext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul <2 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul <2 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_32 = zext <2 x i16> %i16 to <2 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul <2 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = mul <2 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = mul <2 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = mul <2 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <2 x i16> %i16 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = mul <2 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = mul <2 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = mul <2 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = mul <2 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <2 x i32> %i32 to <2 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = mul <2 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <2 x i8> %i8 to <2 x i16>
   %asw_8_16 = mul <2 x i16> %i16, %sw_8_16
@@ -1672,68 +2429,131 @@ define void @extmulv2(<2 x i8> %i8, <2 x i16> %i16, <2 x i32> %i32, <2 x i64> %i
 }
 
 define void @extmulv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i64)  {
-; CHECK-LABEL: 'extmulv4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <4 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <4 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <4 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <4 x i8> %i8 to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <4 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul <4 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul <4 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul <4 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul <4 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %asw_8_64 = mul <4 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %asl_8_64 = mul <4 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %azw_8_64 = mul <4 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %azl_8_64 = mul <4 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul <4 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul <4 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul <4 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul <4 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %asw_16_64 = mul <4 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %asl_16_64 = mul <4 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %azw_16_64 = mul <4 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %azl_16_64 = mul <4 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %asw_32_64 = mul <4 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_32_64 = mul <4 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %azw_32_64 = mul <4 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_32_64 = mul <4 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extmulv4'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <4 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <4 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <4 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <4 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul <4 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul <4 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul <4 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul <4 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %asw_8_64 = mul <4 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %asl_8_64 = mul <4 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %azw_8_64 = mul <4 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %azl_8_64 = mul <4 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul <4 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul <4 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul <4 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul <4 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %asw_16_64 = mul <4 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %asl_16_64 = mul <4 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %azw_16_64 = mul <4 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %azl_16_64 = mul <4 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %asw_32_64 = mul <4 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_32_64 = mul <4 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %azw_32_64 = mul <4 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_32_64 = mul <4 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extmulv4'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <4 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_16 = sext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <4 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <4 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_16 = zext <4 x i8> %i8 to <4 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <4 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul <4 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul <4 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul <4 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <4 x i8> %i8 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul <4 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = mul <4 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = mul <4 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = mul <4 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <4 x i8> %i8 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = mul <4 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul <4 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul <4 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul <4 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <4 x i16> %i16 to <4 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul <4 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = mul <4 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = mul <4 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = mul <4 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <4 x i16> %i16 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = mul <4 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = mul <4 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = mul <4 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = mul <4 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <4 x i32> %i32 to <4 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = mul <4 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <4 x i8> %i8 to <4 x i16>
   %asw_8_16 = mul <4 x i16> %i16, %sw_8_16
@@ -1811,68 +2631,131 @@ define void @extmulv4(<4 x i8> %i8, <4 x i16> %i16, <4 x i32> %i32, <4 x i64> %i
 }
 
 define void @extmulv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i64)  {
-; CHECK-LABEL: 'extmulv8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <8 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <8 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_32 = mul <8 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_32 = mul <8 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_32 = mul <8 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_32 = mul <8 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %asw_8_64 = mul <8 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %asl_8_64 = mul <8 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %azw_8_64 = mul <8 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %azl_8_64 = mul <8 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_16_32 = mul <8 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_16_32 = mul <8 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_16_32 = mul <8 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_16_32 = mul <8 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %asw_16_64 = mul <8 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %asl_16_64 = mul <8 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %azw_16_64 = mul <8 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %azl_16_64 = mul <8 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %asw_32_64 = mul <8 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_32_64 = mul <8 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %azw_32_64 = mul <8 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_32_64 = mul <8 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extmulv8'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <8 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <8 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_32 = mul <8 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_32 = mul <8 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_32 = mul <8 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_32 = mul <8 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %asw_8_64 = mul <8 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %asl_8_64 = mul <8 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %azw_8_64 = mul <8 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %azl_8_64 = mul <8 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_16_32 = mul <8 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_16_32 = mul <8 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_16_32 = mul <8 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_16_32 = mul <8 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %asw_16_64 = mul <8 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %asl_16_64 = mul <8 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %azw_16_64 = mul <8 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %azl_16_64 = mul <8 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %asw_32_64 = mul <8 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_32_64 = mul <8 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %azw_32_64 = mul <8 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_32_64 = mul <8 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extmulv8'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <8 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <8 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul <8 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul <8 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul <8 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <8 x i8> %i8 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul <8 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = mul <8 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = mul <8 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = mul <8 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <8 x i8> %i8 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = mul <8 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul <8 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul <8 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul <8 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <8 x i16> %i16 to <8 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul <8 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = mul <8 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = mul <8 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = mul <8 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <8 x i16> %i16 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = mul <8 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = mul <8 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = mul <8 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = mul <8 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <8 x i32> %i32 to <8 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = mul <8 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <8 x i8> %i8 to <8 x i16>
   %asw_8_16 = mul <8 x i16> %i16, %sw_8_16
@@ -1950,68 +2833,131 @@ define void @extmulv8(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i
 }
 
 define void @extmulv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i64> %i64)  {
-; CHECK-LABEL: 'extmulv16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_16 = mul <16 x i16> %i16, %sw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_16 = mul <16 x i16> %sl1_8_16, %sl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_16 = mul <16 x i16> %i16, %zw_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_16 = mul <16 x i16> %zl1_8_16, %zl2_8_16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_8_32 = mul <16 x i32> %i32, %sw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_8_32 = mul <16 x i32> %sl1_8_32, %sl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_8_32 = mul <16 x i32> %i32, %zw_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_8_32 = mul <16 x i32> %zl1_8_32, %zl2_8_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %asw_8_64 = mul <16 x i64> %i64, %sw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %asl_8_64 = mul <16 x i64> %sl1_8_64, %sl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %azw_8_64 = mul <16 x i64> %i64, %zw_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %azl_8_64 = mul <16 x i64> %zl1_8_64, %zl2_8_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_16_32 = mul <16 x i32> %i32, %sw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_16_32 = mul <16 x i32> %sl1_16_32, %sl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_16_32 = mul <16 x i32> %i32, %zw_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_16_32 = mul <16 x i32> %zl1_16_32, %zl2_16_32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %asw_16_64 = mul <16 x i64> %i64, %sw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %asl_16_64 = mul <16 x i64> %sl1_16_64, %sl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %azw_16_64 = mul <16 x i64> %i64, %zw_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %azl_16_64 = mul <16 x i64> %zl1_16_64, %zl2_16_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %asw_32_64 = mul <16 x i64> %i64, %sw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_32_64 = mul <16 x i64> %sl1_32_64, %sl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %azw_32_64 = mul <16 x i64> %i64, %zw_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_32_64 = mul <16 x i64> %zl1_32_64, %zl2_32_64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extmulv16'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asw_8_16 = mul <16 x i16> %i16, %sw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %asl_8_16 = mul <16 x i16> %sl1_8_16, %sl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azw_8_16 = mul <16 x i16> %i16, %zw_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %azl_8_16 = mul <16 x i16> %zl1_8_16, %zl2_8_16
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_8_32 = mul <16 x i32> %i32, %sw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_8_32 = mul <16 x i32> %sl1_8_32, %sl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_8_32 = mul <16 x i32> %i32, %zw_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_8_32 = mul <16 x i32> %zl1_8_32, %zl2_8_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %asw_8_64 = mul <16 x i64> %i64, %sw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %asl_8_64 = mul <16 x i64> %sl1_8_64, %sl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %azw_8_64 = mul <16 x i64> %i64, %zw_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %azl_8_64 = mul <16 x i64> %zl1_8_64, %zl2_8_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asw_16_32 = mul <16 x i32> %i32, %sw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %asl_16_32 = mul <16 x i32> %sl1_16_32, %sl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azw_16_32 = mul <16 x i32> %i32, %zw_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %azl_16_32 = mul <16 x i32> %zl1_16_32, %zl2_16_32
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %asw_16_64 = mul <16 x i64> %i64, %sw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %asl_16_64 = mul <16 x i64> %sl1_16_64, %sl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %azw_16_64 = mul <16 x i64> %i64, %zw_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %azl_16_64 = mul <16 x i64> %zl1_16_64, %zl2_16_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %asw_32_64 = mul <16 x i64> %i64, %sw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %asl_32_64 = mul <16 x i64> %sl1_32_64, %sl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %azw_32_64 = mul <16 x i64> %i64, %zw_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %azl_32_64 = mul <16 x i64> %zl1_32_64, %zl2_32_64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extmulv16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_16 = mul <16 x i16> %i16, %sw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_8_16 = sext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <16 x i16> %sl1_8_16, %sl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_16 = mul <16 x i16> %i16, %zw_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_8_16 = zext <16 x i8> %i8 to <16 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <16 x i16> %zl1_8_16, %zl2_8_16
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_32 = mul <16 x i32> %i32, %sw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_32 = sext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_32 = mul <16 x i32> %sl1_8_32, %sl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_32 = mul <16 x i32> %i32, %zw_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_32 = zext <16 x i8> %i8 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_32 = mul <16 x i32> %zl1_8_32, %zl2_8_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_8_64 = mul <16 x i64> %i64, %sw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_8_64 = sext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_64 = mul <16 x i64> %sl1_8_64, %sl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_8_64 = mul <16 x i64> %i64, %zw_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_8_64 = zext <16 x i8> %i8 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_64 = mul <16 x i64> %zl1_8_64, %zl2_8_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_32 = mul <16 x i32> %i32, %sw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_16_32 = sext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_32 = mul <16 x i32> %sl1_16_32, %sl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_32 = mul <16 x i32> %i32, %zw_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_16_32 = zext <16 x i16> %i16 to <16 x i32>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_32 = mul <16 x i32> %zl1_16_32, %zl2_16_32
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_16_64 = mul <16 x i64> %i64, %sw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl2_16_64 = sext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_16_64 = mul <16 x i64> %sl1_16_64, %sl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_16_64 = mul <16 x i64> %i64, %zw_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl1_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zl2_16_64 = zext <16 x i16> %i16 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_16_64 = mul <16 x i64> %zl1_16_64, %zl2_16_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sw_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asw_32_64 = mul <16 x i64> %i64, %sw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl1_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sl2_32_64 = sext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_32_64 = mul <16 x i64> %sl1_32_64, %sl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zw_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azw_32_64 = mul <16 x i64> %i64, %zw_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl2_32_64 = zext <16 x i32> %i32 to <16 x i64>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_32_64 = mul <16 x i64> %zl1_32_64, %zl2_32_64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sw_8_16 = sext <16 x i8> %i8 to <16 x i16>
   %asw_8_16 = mul <16 x i16> %i16, %sw_8_16
@@ -2089,15 +3035,25 @@ define void @extmulv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6
 }
 
 define void @extmul_const(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i64)  {
-; CHECK-LABEL: 'extmul_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, splat (i16 10)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, splat (i16 10)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16b = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, splat (i16 255)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %aal_8_16 = mul <8 x i16> %zl1_8_16b, %and
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'extmul_const'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, splat (i16 10)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, splat (i16 10)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16b = zext <8 x i8> %i8 to <8 x i16>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, splat (i16 255)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %aal_8_16 = mul <8 x i16> %zl1_8_16b, %and
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'extmul_const'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, splat (i16 10)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, splat (i16 10)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16b = zext <8 x i8> %i8 to <8 x i16>
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, splat (i16 255)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %aal_8_16 = mul <8 x i16> %zl1_8_16b, %and
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
   %asl_8_16 = mul <8 x i16> %sl1_8_16, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>

diff  --git a/llvm/test/Analysis/CostModel/AArch64/arith.ll b/llvm/test/Analysis/CostModel/AArch64/arith.ll
index 65337ca8f196d..2355a612809c9 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith.ll
@@ -1,20 +1,33 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -mtriple=aarch64-linux-gnu < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefix=RECIP
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefix=SIZE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @i1() {
-; CHECK-LABEL: 'i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'i1'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i1 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i1 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i1 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i1 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i1 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i1 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i1 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i1 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i1 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'i1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i1 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i1 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i1 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i1 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i1 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i1 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i1 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i1 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i1 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c = add i1 undef, undef
   %d = sub i1 undef, undef
@@ -29,17 +42,29 @@ define void @i1() {
 }
 
 define void @i8() {
-; CHECK-LABEL: 'i8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'i8'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i8 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i8 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i8 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i8 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i8 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'i8'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i8 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i8 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i8 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i8 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i8 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c = add i8 undef, undef
   %d = sub i8 undef, undef
@@ -54,17 +79,29 @@ define void @i8() {
 }
 
 define void @i16() {
-; CHECK-LABEL: 'i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'i16'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i16 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i16 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i16 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i16 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i16 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'i16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i16 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i16 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i16 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i16 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i16 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c = add i16 undef, undef
   %d = sub i16 undef, undef
@@ -79,17 +116,29 @@ define void @i16() {
 }
 
 define void @i32() {
-; CHECK-LABEL: 'i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'i32'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i32 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i32 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i32 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i32 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i32 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'i32'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i32 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i32 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i32 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i32 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i32 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c = add i32 undef, undef
   %d = sub i32 undef, undef
@@ -104,17 +153,29 @@ define void @i32() {
 }
 
 define void @i64() {
-; CHECK-LABEL: 'i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'i64'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i64 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i64 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i64 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i64 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i64 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i64 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i64 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i64 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i64 undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'i64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = add i64 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = sub i64 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = mul i64 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = ashr i64 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = lshr i64 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = shl i64 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i = and i64 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j = or i64 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k = xor i64 undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c = add i64 undef, undef
   %d = sub i64 undef, undef
@@ -130,44 +191,83 @@ define void @i64() {
 
 
 define void @vi8() {
-; CHECK-LABEL: 'vi8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'vi8'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vi8'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c2 = add <2 x i8> undef, undef
   %d2 = sub <2 x i8> undef, undef
@@ -209,44 +309,83 @@ define void @vi8() {
 }
 
 define void @vi16() {
-; CHECK-LABEL: 'vi16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c16 = add <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %d16 = sub <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e16 = mul <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = ashr <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %g16 = lshr <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %h16 = shl <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'vi16'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c16 = add <16 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %d16 = sub <16 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e16 = mul <16 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = ashr <16 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %g16 = lshr <16 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %h16 = shl <16 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vi16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i16> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c2 = add <2 x i16> undef, undef
   %d2 = sub <2 x i16> undef, undef
@@ -288,44 +427,83 @@ define void @vi16() {
 }
 
 define void @vi32() {
-; CHECK-LABEL: 'vi32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'vi32'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c8 = add <8 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %d8 = sub <8 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e8 = mul <8 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f8 = ashr <8 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %g8 = lshr <8 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %h8 = shl <8 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = and <8 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j8 = or <8 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k8 = xor <8 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %c16 = add <16 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %d16 = sub <16 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e16 = mul <16 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = ashr <16 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %g16 = lshr <16 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %h16 = shl <16 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vi32'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i32> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c2 = add <2 x i32> undef, undef
   %d2 = sub <2 x i32> undef, undef
@@ -367,44 +545,83 @@ define void @vi32() {
 }
 
 define void @vi64() {
-; CHECK-LABEL: 'vi64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %e2 = mul <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c4 = add <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %d4 = sub <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %e4 = mul <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %c8 = add <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %d8 = sub <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %e8 = mul <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f8 = ashr <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %g8 = lshr <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %h8 = shl <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %c16 = add <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %d16 = sub <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %e16 = mul <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f16 = ashr <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %g16 = lshr <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %h16 = shl <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'vi64'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %e2 = mul <2 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %c4 = add <4 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %d4 = sub <4 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %e4 = mul <4 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f4 = ashr <4 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %g4 = lshr <4 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %h4 = shl <4 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %c8 = add <8 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %d8 = sub <8 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %e8 = mul <8 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f8 = ashr <8 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %g8 = lshr <8 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %h8 = shl <8 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %c16 = add <16 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %d16 = sub <16 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %e16 = mul <16 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f16 = ashr <16 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %g16 = lshr <16 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %h16 = shl <16 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'vi64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d2 = sub <2 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e2 = mul <2 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f2 = ashr <2 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g2 = lshr <2 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h2 = shl <2 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c4 = add <4 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d4 = sub <4 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e4 = mul <4 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f4 = ashr <4 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g4 = lshr <4 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h4 = shl <4 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i4 = and <4 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j4 = or <4 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k4 = xor <4 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c8 = add <8 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d8 = sub <8 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e8 = mul <8 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f8 = ashr <8 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g8 = lshr <8 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h8 = shl <8 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and <8 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j8 = or <8 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k8 = xor <8 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c16 = add <16 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d16 = sub <16 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e16 = mul <16 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = ashr <16 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g16 = lshr <16 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h16 = shl <16 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i64> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %c2 = add <2 x i64> undef, undef
   %d2 = sub <2 x i64> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/AArch64/bitreverse.ll b/llvm/test/Analysis/CostModel/AArch64/bitreverse.ll
index 35133c4677e4c..44e14bc78424d 100644
--- a/llvm/test/Analysis/CostModel/AArch64/bitreverse.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/bitreverse.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
 
 ; Verify the cost of scalar bitreverse instructions.
 

diff  --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
index 1ddc995366bcc..8c6466ab470b4 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@@ -1,12 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
-; RUN: opt -passes="print<cost-model>" -disable-output -mtriple=arm64-apple-ios < %s 2>&1 | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=throughput -disable-output -mtriple=arm64-apple-ios < %s 2>&1 | FileCheck %s --check-prefix=RECIP
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size -disable-output -mtriple=arm64-apple-ios < %s 2>&1 | FileCheck %s --check-prefix=SIZE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define i8 @fshl_i8_3rd_arg_const(i8 %a, i8 %b) {
-; CHECK-LABEL: 'fshl_i8_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshl
+; RECIP-LABEL: 'fshl_i8_3rd_arg_const'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshl
+;
+; SIZE-LABEL: 'fshl_i8_3rd_arg_const'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %fshl
 ;
 entry:
   %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9)
@@ -14,9 +19,13 @@ entry:
 }
 
 define i8 @fshl_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
-; CHECK-LABEL: 'fshl_i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshl
+; RECIP-LABEL: 'fshl_i8_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshl
+;
+; SIZE-LABEL: 'fshl_i8_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %fshl
 ;
 entry:
   %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
@@ -26,9 +35,13 @@ entry:
 declare i8 @llvm.fshl.i8(i8, i8, i8)
 
 define i16 @fshl_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: 'fshl_i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %fshl
+; RECIP-LABEL: 'fshl_i16'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %fshl
+;
+; SIZE-LABEL: 'fshl_i16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %fshl
 ;
 entry:
   %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9)
@@ -38,9 +51,13 @@ entry:
 declare i16 @llvm.fshl.i16(i16, i16, i16)
 
 define i32 @fshl_i32_3rd_arg_const(i32 %a, i32 %b) {
-; CHECK-LABEL: 'fshl_i32_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshl
+; RECIP-LABEL: 'fshl_i32_3rd_arg_const'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshl
+;
+; SIZE-LABEL: 'fshl_i32_3rd_arg_const'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %fshl
 ;
 entry:
   %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9)
@@ -48,9 +65,13 @@ entry:
 }
 
 define i32 @fshl_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
-; CHECK-LABEL: 'fshl_i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshl
+; RECIP-LABEL: 'fshl_i32_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshl
+;
+; SIZE-LABEL: 'fshl_i32_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %fshl
 ;
 entry:
   %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
@@ -60,9 +81,13 @@ entry:
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 
 define i64 @fshl_i64_3rd_arg_const(i64 %a, i64 %b) {
-; CHECK-LABEL: 'fshl_i64_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl
+; RECIP-LABEL: 'fshl_i64_3rd_arg_const'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl
+;
+; SIZE-LABEL: 'fshl_i64_3rd_arg_const'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %fshl
 ;
 entry:
   %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9)
@@ -70,9 +95,13 @@ entry:
 }
 
 define i64 @fshl_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
-; CHECK-LABEL: 'fshl_i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl
+; RECIP-LABEL: 'fshl_i64_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl
+;
+; SIZE-LABEL: 'fshl_i64_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %fshl
 ;
 entry:
   %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
@@ -82,9 +111,13 @@ entry:
 declare i64 @llvm.fshl.i64(i64, i64, i64)
 
 define i19 @fshl_i19(i19 %a, i19 %b) {
-; CHECK-LABEL: 'fshl_i19'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i19 %fshl
+; RECIP-LABEL: 'fshl_i19'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i19 %fshl
+;
+; SIZE-LABEL: 'fshl_i19'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i19 %fshl
 ;
 entry:
   %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9)
@@ -95,9 +128,13 @@ declare i19 @llvm.fshl.i19(i19, i19, i19)
 
 
 define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
+; RECIP-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
+;
+; SIZE-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %fshl
 ;
 entry:
   %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
@@ -105,9 +142,13 @@ entry:
 }
 
 define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_lanes_
diff erent(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
+; RECIP-LABEL: 'fshl_v16i8_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
+;
+; SIZE-LABEL: 'fshl_v16i8_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %fshl
 ;
 entry:
   %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
@@ -115,9 +156,13 @@ entry:
 }
 
 define <16 x i8> @fshl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK-LABEL: 'fshl_v16i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
+; RECIP-LABEL: 'fshl_v16i8_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
+;
+; SIZE-LABEL: 'fshl_v16i8_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %fshl
 ;
 entry:
   %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
@@ -127,9 +172,13 @@ entry:
 declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
 
 define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
+; RECIP-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
+;
+; SIZE-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %fshl
 ;
 entry:
   %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
@@ -137,9 +186,13 @@ entry:
 }
 
 define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_lanes_
diff erent(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
+; RECIP-LABEL: 'fshl_v8i16_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
+;
+; SIZE-LABEL: 'fshl_v8i16_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %fshl
 ;
 entry:
   %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
@@ -147,9 +200,13 @@ entry:
 }
 
 define <8 x i16> @fshl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK-LABEL: 'fshl_v8i16_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
+; RECIP-LABEL: 'fshl_v8i16_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
+;
+; SIZE-LABEL: 'fshl_v8i16_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %fshl
 ;
 entry:
   %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
@@ -159,9 +216,13 @@ entry:
 declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
 
 define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
+; RECIP-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
+;
+; SIZE-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %fshl
 ;
 entry:
   %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
@@ -169,9 +230,13 @@ entry:
 }
 
 define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_lanes_
diff erent(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
+; RECIP-LABEL: 'fshl_v4i32_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
+;
+; SIZE-LABEL: 'fshl_v4i32_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %fshl
 ;
 entry:
   %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
@@ -179,9 +244,13 @@ entry:
 }
 
 define <4 x i32> @fshl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK-LABEL: 'fshl_v4i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
+; RECIP-LABEL: 'fshl_v4i32_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
+;
+; SIZE-LABEL: 'fshl_v4i32_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %fshl
 ;
 entry:
   %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
@@ -191,9 +260,13 @@ entry:
 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 
 define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
+; RECIP-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
+;
+; SIZE-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %fshl
 ;
 entry:
   %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
@@ -201,9 +274,13 @@ entry:
 }
 
 define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_lanes_
diff erent(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
+; RECIP-LABEL: 'fshl_v2i64_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
+;
+; SIZE-LABEL: 'fshl_v2i64_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %fshl
 ;
 entry:
   %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
@@ -211,9 +288,13 @@ entry:
 }
 
 define <2 x i64> @fshl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
-; CHECK-LABEL: 'fshl_v2i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
+; RECIP-LABEL: 'fshl_v2i64_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
+;
+; SIZE-LABEL: 'fshl_v2i64_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %fshl
 ;
 entry:
   %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
@@ -223,9 +304,13 @@ entry:
 declare <2 x i64> @llvm.fshl.v4i64(<2 x i64>, <2 x i64>, <2 x i64>)
 
 define <4 x i30> @fshl_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) {
-; CHECK-LABEL: 'fshl_v4i30_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i30> %fshl
+; RECIP-LABEL: 'fshl_v4i30_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i30> %fshl
+;
+; SIZE-LABEL: 'fshl_v4i30_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i30> %fshl
 ;
 entry:
   %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
@@ -235,9 +320,13 @@ entry:
 declare <4 x i30> @llvm.fshl.v4i30(<4 x i30>, <4 x i30>, <4 x i30>)
 
 define <2 x i66> @fshl_v2i66_3rd_arg_vec_const_lanes_
diff erent(<2 x i66> %a, <2 x i66> %b) {
-; CHECK-LABEL: 'fshl_v2i66_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i66> %fshl
+; RECIP-LABEL: 'fshl_v2i66_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i66> %fshl
+;
+; SIZE-LABEL: 'fshl_v2i66_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i66> %fshl
 ;
 entry:
   %fshl = tail call <2 x i66> @llvm.fshl.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
@@ -246,9 +335,13 @@ entry:
 declare <2 x i66> @llvm.fshl.v4i66(<2 x i66>, <2 x i66>, <2 x i66>)
 
 define i66 @fshl_i66(i66 %a, i66 %b) {
-; CHECK-LABEL: 'fshl_i66'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i66 %fshl
+; RECIP-LABEL: 'fshl_i66'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i66 %fshl
+;
+; SIZE-LABEL: 'fshl_i66'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i66 %fshl
 ;
 entry:
   %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9)
@@ -258,9 +351,13 @@ entry:
 declare i66 @llvm.fshl.i66(i66, i66, i66)
 
 define <2 x i128> @fshl_v2i128_3rd_arg_vec_const_lanes_
diff erent(<2 x i128> %a, <2 x i128> %b) {
-; CHECK-LABEL: 'fshl_v2i128_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i128> %fshl
+; RECIP-LABEL: 'fshl_v2i128_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i128> %fshl
+;
+; SIZE-LABEL: 'fshl_v2i128_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i128> %fshl
 ;
 entry:
   %fshl = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
@@ -269,9 +366,13 @@ entry:
 declare <2 x i128> @llvm.fshl.v4i128(<2 x i128>, <2 x i128>, <2 x i128>)
 
 define i128 @fshl_i128(i128 %a, i128 %b) {
-; CHECK-LABEL: 'fshl_i128'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i128 %fshl
+; RECIP-LABEL: 'fshl_i128'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i128 %fshl
+;
+; SIZE-LABEL: 'fshl_i128'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i128 %fshl
 ;
 entry:
   %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
index 438690f76f019..120b1c4c4c4ef 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@@ -1,12 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
-; RUN: opt -passes="print<cost-model>" -disable-output -mtriple=arm64-apple-ios < %s 2>&1 | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=throughput -disable-output -mtriple=arm64-apple-ios < %s 2>&1 | FileCheck %s --check-prefix=RECIP
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size -disable-output -mtriple=arm64-apple-ios < %s 2>&1 | FileCheck %s --check-prefix=SIZE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define i8 @fshr_i8_3rd_arg_const(i8 %a, i8 %b) {
-; CHECK-LABEL: 'fshr_i8_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshr
+; RECIP-LABEL: 'fshr_i8_3rd_arg_const'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshr
+;
+; SIZE-LABEL: 'fshr_i8_3rd_arg_const'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %fshr
 ;
 entry:
   %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9)
@@ -14,9 +19,13 @@ entry:
 }
 
 define i8 @fshr_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
-; CHECK-LABEL: 'fshr_i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshr
+; RECIP-LABEL: 'fshr_i8_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshr
+;
+; SIZE-LABEL: 'fshr_i8_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %fshr
 ;
 entry:
   %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
@@ -26,9 +35,13 @@ entry:
 declare i8 @llvm.fshr.i8(i8, i8, i8)
 
 define i16 @fshr_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: 'fshr_i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %fshr
+; RECIP-LABEL: 'fshr_i16'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %fshr
+;
+; SIZE-LABEL: 'fshr_i16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %fshr
 ;
 entry:
   %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9)
@@ -38,9 +51,13 @@ entry:
 declare i16 @llvm.fshr.i16(i16, i16, i16)
 
 define i32 @fshr_i32_3rd_arg_const(i32 %a, i32 %b) {
-; CHECK-LABEL: 'fshr_i32_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshr
+; RECIP-LABEL: 'fshr_i32_3rd_arg_const'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshr
+;
+; SIZE-LABEL: 'fshr_i32_3rd_arg_const'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %fshr
 ;
 entry:
   %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9)
@@ -48,9 +65,13 @@ entry:
 }
 
 define i32 @fshr_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
-; CHECK-LABEL: 'fshr_i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshr
+; RECIP-LABEL: 'fshr_i32_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshr
+;
+; SIZE-LABEL: 'fshr_i32_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %fshr
 ;
 entry:
   %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
@@ -60,9 +81,13 @@ entry:
 declare i32 @llvm.fshr.i32(i32, i32, i32)
 
 define i64 @fshr_i64_3rd_arg_const(i64 %a, i64 %b) {
-; CHECK-LABEL: 'fshr_i64_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr
+; RECIP-LABEL: 'fshr_i64_3rd_arg_const'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr
+;
+; SIZE-LABEL: 'fshr_i64_3rd_arg_const'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %fshr
 ;
 entry:
   %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9)
@@ -70,9 +95,13 @@ entry:
 }
 
 define i64 @fshr_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
-; CHECK-LABEL: 'fshr_i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr
+; RECIP-LABEL: 'fshr_i64_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr
+;
+; SIZE-LABEL: 'fshr_i64_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %fshr
 ;
 entry:
   %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
@@ -82,9 +111,13 @@ entry:
 declare i64 @llvm.fshr.i64(i64, i64, i64)
 
 define i19 @fshr_i19(i19 %a, i19 %b) {
-; CHECK-LABEL: 'fshr_i19'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i19 %fshr
+; RECIP-LABEL: 'fshr_i19'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i19 %fshr
+;
+; SIZE-LABEL: 'fshr_i19'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i19 %fshr
 ;
 entry:
   %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9)
@@ -95,9 +128,13 @@ declare i19 @llvm.fshr.i19(i19, i19, i19)
 
 
 define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
+; RECIP-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
+;
+; SIZE-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %fshr
 ;
 entry:
   %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
@@ -105,9 +142,13 @@ entry:
 }
 
 define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_lanes_
diff erent(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
+; RECIP-LABEL: 'fshr_v16i8_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
+;
+; SIZE-LABEL: 'fshr_v16i8_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %fshr
 ;
 entry:
   %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
@@ -115,9 +156,13 @@ entry:
 }
 
 define <16 x i8> @fshr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK-LABEL: 'fshr_v16i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
+; RECIP-LABEL: 'fshr_v16i8_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
+;
+; SIZE-LABEL: 'fshr_v16i8_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %fshr
 ;
 entry:
   %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
@@ -127,9 +172,13 @@ entry:
 declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
 
 define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
+; RECIP-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
+;
+; SIZE-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %fshr
 ;
 entry:
   %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
@@ -137,9 +186,13 @@ entry:
 }
 
 define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_lanes_
diff erent(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
+; RECIP-LABEL: 'fshr_v8i16_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
+;
+; SIZE-LABEL: 'fshr_v8i16_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %fshr
 ;
 entry:
   %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
@@ -147,9 +200,13 @@ entry:
 }
 
 define <8 x i16> @fshr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK-LABEL: 'fshr_v8i16_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
+; RECIP-LABEL: 'fshr_v8i16_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
+;
+; SIZE-LABEL: 'fshr_v8i16_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %fshr
 ;
 entry:
   %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
@@ -159,9 +216,13 @@ entry:
 declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
 
 define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
+; RECIP-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
+;
+; SIZE-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %fshr
 ;
 entry:
   %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
@@ -169,9 +230,13 @@ entry:
 }
 
 define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_lanes_
diff erent(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
+; RECIP-LABEL: 'fshr_v4i32_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
+;
+; SIZE-LABEL: 'fshr_v4i32_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %fshr
 ;
 entry:
   %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
@@ -179,9 +244,13 @@ entry:
 }
 
 define <4 x i32> @fshr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK-LABEL: 'fshr_v4i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
+; RECIP-LABEL: 'fshr_v4i32_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
+;
+; SIZE-LABEL: 'fshr_v4i32_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %fshr
 ;
 entry:
   %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
@@ -191,9 +260,13 @@ entry:
 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 
 define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
+; RECIP-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
+;
+; SIZE-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %fshr
 ;
 entry:
   %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
@@ -201,9 +274,13 @@ entry:
 }
 
 define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_lanes_
diff erent(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
+; RECIP-LABEL: 'fshr_v2i64_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
+;
+; SIZE-LABEL: 'fshr_v2i64_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %fshr
 ;
 entry:
   %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
@@ -211,9 +288,13 @@ entry:
 }
 
 define <2 x i64> @fshr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
-; CHECK-LABEL: 'fshr_v2i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
+; RECIP-LABEL: 'fshr_v2i64_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
+;
+; SIZE-LABEL: 'fshr_v2i64_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %fshr
 ;
 entry:
   %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
@@ -223,9 +304,13 @@ entry:
 declare <2 x i64> @llvm.fshr.v4i64(<2 x i64>, <2 x i64>, <2 x i64>)
 
 define <4 x i30> @fshr_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) {
-; CHECK-LABEL: 'fshr_v4i30_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i30> %fshr
+; RECIP-LABEL: 'fshr_v4i30_3rd_arg_var'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i30> %fshr
+;
+; SIZE-LABEL: 'fshr_v4i30_3rd_arg_var'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i30> %fshr
 ;
 entry:
   %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
@@ -235,9 +320,13 @@ entry:
 declare <4 x i30> @llvm.fshr.v4i30(<4 x i30>, <4 x i30>, <4 x i30>)
 
 define <2 x i66> @fshr_v2i66_3rd_arg_vec_const_lanes_
diff erent(<2 x i66> %a, <2 x i66> %b) {
-; CHECK-LABEL: 'fshr_v2i66_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i66> %fshr
+; RECIP-LABEL: 'fshr_v2i66_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i66> %fshr
+;
+; SIZE-LABEL: 'fshr_v2i66_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i66> %fshr
 ;
 entry:
   %fshr = tail call <2 x i66> @llvm.fshr.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
@@ -246,9 +335,13 @@ entry:
 declare <2 x i66> @llvm.fshr.v4i66(<2 x i66>, <2 x i66>, <2 x i66>)
 
 define i66 @fshr_i66(i66 %a, i66 %b) {
-; CHECK-LABEL: 'fshr_i66'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i66 %fshr
+; RECIP-LABEL: 'fshr_i66'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i66 %fshr
+;
+; SIZE-LABEL: 'fshr_i66'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i66 %fshr
 ;
 entry:
   %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9)
@@ -258,9 +351,13 @@ entry:
 declare i66 @llvm.fshr.i66(i66, i66, i66)
 
 define <2 x i128> @fshr_v2i128_3rd_arg_vec_const_lanes_
diff erent(<2 x i128> %a, <2 x i128> %b) {
-; CHECK-LABEL: 'fshr_v2i128_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i128> %fshr
+; RECIP-LABEL: 'fshr_v2i128_3rd_arg_vec_const_lanes_
diff erent'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i128> %fshr
+;
+; SIZE-LABEL: 'fshr_v2i128_3rd_arg_vec_const_lanes_
diff erent'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i128> %fshr
 ;
 entry:
   %fshr = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
@@ -269,9 +366,13 @@ entry:
 declare <2 x i128> @llvm.fshr.v4i128(<2 x i128>, <2 x i128>, <2 x i128>)
 
 define i128 @fshr_i128(i128 %a, i128 %b) {
-; CHECK-LABEL: 'fshr_i128'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i128 %fshr
+; RECIP-LABEL: 'fshr_i128'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i128 %fshr
+;
+; SIZE-LABEL: 'fshr_i128'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i128 %fshr
 ;
 entry:
   %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/gep.ll b/llvm/test/Analysis/CostModel/AArch64/gep.ll
index 34c53cba173b8..979d105670943 100644
--- a/llvm/test/Analysis/CostModel/AArch64/gep.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/gep.ll
@@ -1,14 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64--linux-gnu < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefix=RECIP
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefix=SIZE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64--linux-gnu"
+target triple = "aarch64-linux-gnu"
 
 define i8 @test1(ptr %p) {
-; CHECK-LABEL: 'test1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+; RECIP-LABEL: 'test1'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
+; SIZE-LABEL: 'test1'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %v
 ;
   %a = getelementptr inbounds i8, ptr %p, i32 1
   %v = load i8, ptr %a
@@ -16,10 +22,15 @@ define i8 @test1(ptr %p) {
 }
 
 define i16 @test2(ptr %p) {
-; CHECK-LABEL: 'test2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+; RECIP-LABEL: 'test2'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
+; SIZE-LABEL: 'test2'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %v
 ;
   %a = getelementptr inbounds i16, ptr %p, i32 1
   %v = load i16, ptr %a
@@ -27,10 +38,15 @@ define i16 @test2(ptr %p) {
 }
 
 define i32 @test3(ptr %p) {
-; CHECK-LABEL: 'test3'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+; RECIP-LABEL: 'test3'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
+; SIZE-LABEL: 'test3'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %v
 ;
   %a = getelementptr inbounds i32, ptr %p, i32 1
   %v = load i32, ptr %a
@@ -38,10 +54,15 @@ define i32 @test3(ptr %p) {
 }
 
 define i64 @test4(ptr %p) {
-; CHECK-LABEL: 'test4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+; RECIP-LABEL: 'test4'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
+; SIZE-LABEL: 'test4'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %v
 ;
   %a = getelementptr inbounds i64, ptr %p, i32 1
   %v = load i64, ptr %a
@@ -49,10 +70,15 @@ define i64 @test4(ptr %p) {
 }
 
 define i8 @test5(ptr %p) {
-; CHECK-LABEL: 'test5'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 1024
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+; RECIP-LABEL: 'test5'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 1024
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
+; SIZE-LABEL: 'test5'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 1024
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %v
 ;
   %a = getelementptr inbounds i8, ptr %p, i32 1024
   %v = load i8, ptr %a
@@ -60,10 +86,15 @@ define i8 @test5(ptr %p) {
 }
 
 define i16 @test6(ptr %p) {
-; CHECK-LABEL: 'test6'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 1024
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+; RECIP-LABEL: 'test6'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 1024
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
+; SIZE-LABEL: 'test6'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 1024
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %v
 ;
   %a = getelementptr inbounds i16, ptr %p, i32 1024
   %v = load i16, ptr %a
@@ -71,10 +102,15 @@ define i16 @test6(ptr %p) {
 }
 
 define i32 @test7(ptr %p) {
-; CHECK-LABEL: 'test7'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 1024
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+; RECIP-LABEL: 'test7'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 1024
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
+; SIZE-LABEL: 'test7'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 1024
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %v
 ;
   %a = getelementptr inbounds i32, ptr %p, i32 1024
   %v = load i32, ptr %a
@@ -82,10 +118,15 @@ define i32 @test7(ptr %p) {
 }
 
 define i64 @test8(ptr %p) {
-; CHECK-LABEL: 'test8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 1024
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+; RECIP-LABEL: 'test8'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 1024
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
+; SIZE-LABEL: 'test8'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 1024
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %v
 ;
   %a = getelementptr inbounds i64, ptr %p, i32 1024
   %v = load i64, ptr %a
@@ -93,10 +134,15 @@ define i64 @test8(ptr %p) {
 }
 
 define i8 @test9(ptr %p) {
-; CHECK-LABEL: 'test9'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 4096
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+; RECIP-LABEL: 'test9'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 4096
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
+; SIZE-LABEL: 'test9'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 4096
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %v
 ;
   %a = getelementptr inbounds i8, ptr %p, i32 4096
   %v = load i8, ptr %a
@@ -104,10 +150,15 @@ define i8 @test9(ptr %p) {
 }
 
 define i16 @test10(ptr %p) {
-; CHECK-LABEL: 'test10'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 4096
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+; RECIP-LABEL: 'test10'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 4096
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
+; SIZE-LABEL: 'test10'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 4096
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %v
 ;
   %a = getelementptr inbounds i16, ptr %p, i32 4096
   %v = load i16, ptr %a
@@ -115,10 +166,15 @@ define i16 @test10(ptr %p) {
 }
 
 define i32 @test11(ptr %p) {
-; CHECK-LABEL: 'test11'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 4096
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+; RECIP-LABEL: 'test11'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 4096
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
+; SIZE-LABEL: 'test11'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 4096
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %v
 ;
   %a = getelementptr inbounds i32, ptr %p, i32 4096
   %v = load i32, ptr %a
@@ -126,10 +182,15 @@ define i32 @test11(ptr %p) {
 }
 
 define i64 @test12(ptr %p) {
-; CHECK-LABEL: 'test12'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 4096
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+; RECIP-LABEL: 'test12'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 4096
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
+; SIZE-LABEL: 'test12'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 4096
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %v
 ;
   %a = getelementptr inbounds i64, ptr %p, i32 4096
   %v = load i64, ptr %a
@@ -137,10 +198,15 @@ define i64 @test12(ptr %p) {
 }
 
 define i8 @test13(ptr %p) {
-; CHECK-LABEL: 'test13'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+; RECIP-LABEL: 'test13'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
+; SIZE-LABEL: 'test13'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %v
 ;
   %a = getelementptr inbounds i8, ptr %p, i32 -64
   %v = load i8, ptr %a
@@ -148,10 +214,15 @@ define i8 @test13(ptr %p) {
 }
 
 define i16 @test14(ptr %p) {
-; CHECK-LABEL: 'test14'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+; RECIP-LABEL: 'test14'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
+; SIZE-LABEL: 'test14'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %v
 ;
   %a = getelementptr inbounds i16, ptr %p, i32 -64
   %v = load i16, ptr %a
@@ -159,10 +230,15 @@ define i16 @test14(ptr %p) {
 }
 
 define i32 @test15(ptr %p) {
-; CHECK-LABEL: 'test15'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+; RECIP-LABEL: 'test15'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
+; SIZE-LABEL: 'test15'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %v
 ;
   %a = getelementptr inbounds i32, ptr %p, i32 -64
   %v = load i32, ptr %a
@@ -170,10 +246,15 @@ define i32 @test15(ptr %p) {
 }
 
 define i64 @test16(ptr %p) {
-; CHECK-LABEL: 'test16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+; RECIP-LABEL: 'test16'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -64
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
+; SIZE-LABEL: 'test16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -64
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %v
 ;
   %a = getelementptr inbounds i64, ptr %p, i32 -64
   %v = load i64, ptr %a
@@ -181,10 +262,15 @@ define i64 @test16(ptr %p) {
 }
 
 define i8 @test17(ptr %p) {
-; CHECK-LABEL: 'test17'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -1024
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+; RECIP-LABEL: 'test17'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -1024
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
+; SIZE-LABEL: 'test17'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -1024
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %v
 ;
   %a = getelementptr inbounds i8, ptr %p, i32 -1024
   %v = load i8, ptr %a
@@ -192,10 +278,15 @@ define i8 @test17(ptr %p) {
 }
 
 define i16 @test18(ptr %p) {
-; CHECK-LABEL: 'test18'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -1024
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+; RECIP-LABEL: 'test18'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -1024
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
+; SIZE-LABEL: 'test18'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -1024
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %v
 ;
   %a = getelementptr inbounds i16, ptr %p, i32 -1024
   %v = load i16, ptr %a
@@ -203,10 +294,15 @@ define i16 @test18(ptr %p) {
 }
 
 define i32 @test19(ptr %p) {
-; CHECK-LABEL: 'test19'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -1024
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+; RECIP-LABEL: 'test19'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -1024
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
+; SIZE-LABEL: 'test19'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -1024
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %v
 ;
   %a = getelementptr inbounds i32, ptr %p, i32 -1024
   %v = load i32, ptr %a
@@ -214,10 +310,15 @@ define i32 @test19(ptr %p) {
 }
 
 define i64 @test20(ptr %p) {
-; CHECK-LABEL: 'test20'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -1024
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+; RECIP-LABEL: 'test20'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -1024
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
+; SIZE-LABEL: 'test20'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -1024
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %v
 ;
   %a = getelementptr inbounds i64, ptr %p, i32 -1024
   %v = load i64, ptr %a
@@ -225,10 +326,15 @@ define i64 @test20(ptr %p) {
 }
 
 define i8 @test21(ptr %p, i32 %i) {
-; CHECK-LABEL: 'test21'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 %i
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+; RECIP-LABEL: 'test21'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 %i
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
+; SIZE-LABEL: 'test21'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 %i
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %v
 ;
   %a = getelementptr inbounds i8, ptr %p, i32 %i
   %v = load i8, ptr %a
@@ -236,10 +342,15 @@ define i8 @test21(ptr %p, i32 %i) {
 }
 
 define i16 @test22(ptr %p, i32 %i) {
-; CHECK-LABEL: 'test22'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 %i
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+; RECIP-LABEL: 'test22'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 %i
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
+; SIZE-LABEL: 'test22'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 %i
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %v
 ;
   %a = getelementptr inbounds i16, ptr %p, i32 %i
   %v = load i16, ptr %a
@@ -247,10 +358,15 @@ define i16 @test22(ptr %p, i32 %i) {
 }
 
 define i32 @test23(ptr %p, i32 %i) {
-; CHECK-LABEL: 'test23'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 %i
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+; RECIP-LABEL: 'test23'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 %i
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
+; SIZE-LABEL: 'test23'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 %i
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %v
 ;
   %a = getelementptr inbounds i32, ptr %p, i32 %i
   %v = load i32, ptr %a
@@ -258,10 +374,15 @@ define i32 @test23(ptr %p, i32 %i) {
 }
 
 define i64 @test24(ptr %p, i32 %i) {
-; CHECK-LABEL: 'test24'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 %i
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+; RECIP-LABEL: 'test24'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 %i
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
+; SIZE-LABEL: 'test24'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 %i
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %v
 ;
   %a = getelementptr inbounds i64, ptr %p, i32 %i
   %v = load i64, ptr %a
@@ -269,10 +390,15 @@ define i64 @test24(ptr %p, i32 %i) {
 }
 
 define i8 @test25(ptr %p) {
-; CHECK-LABEL: 'test25'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+; RECIP-LABEL: 'test25'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -128
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
+; SIZE-LABEL: 'test25'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -128
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %v
 ;
   %a = getelementptr inbounds i8, ptr %p, i32 -128
   %v = load i8, ptr %a
@@ -280,10 +406,15 @@ define i8 @test25(ptr %p) {
 }
 
 define i16 @test26(ptr %p) {
-; CHECK-LABEL: 'test26'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+; RECIP-LABEL: 'test26'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -128
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
+; SIZE-LABEL: 'test26'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -128
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %v
 ;
   %a = getelementptr inbounds i16, ptr %p, i32 -128
   %v = load i16, ptr %a
@@ -291,10 +422,15 @@ define i16 @test26(ptr %p) {
 }
 
 define i32 @test27(ptr %p) {
-; CHECK-LABEL: 'test27'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+; RECIP-LABEL: 'test27'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -128
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
+; SIZE-LABEL: 'test27'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -128
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %v
 ;
   %a = getelementptr inbounds i32, ptr %p, i32 -128
   %v = load i32, ptr %a
@@ -302,10 +438,15 @@ define i32 @test27(ptr %p) {
 }
 
 define i64 @test28(ptr %p) {
-; CHECK-LABEL: 'test28'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+; RECIP-LABEL: 'test28'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -128
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
+; SIZE-LABEL: 'test28'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -128
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %v
 ;
   %a = getelementptr inbounds i64, ptr %p, i32 -128
   %v = load i64, ptr %a
@@ -313,10 +454,15 @@ define i64 @test28(ptr %p) {
 }
 
 define i8 @test29(ptr %p) {
-; CHECK-LABEL: 'test29'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+; RECIP-LABEL: 'test29'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -256
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
+; SIZE-LABEL: 'test29'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -256
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %v
 ;
   %a = getelementptr inbounds i8, ptr %p, i32 -256
   %v = load i8, ptr %a
@@ -324,10 +470,15 @@ define i8 @test29(ptr %p) {
 }
 
 define i16 @test30(ptr %p) {
-; CHECK-LABEL: 'test30'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+; RECIP-LABEL: 'test30'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -256
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
+; SIZE-LABEL: 'test30'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -256
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %v
 ;
   %a = getelementptr inbounds i16, ptr %p, i32 -256
   %v = load i16, ptr %a
@@ -335,10 +486,15 @@ define i16 @test30(ptr %p) {
 }
 
 define i32 @test31(ptr %p) {
-; CHECK-LABEL: 'test31'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+; RECIP-LABEL: 'test31'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -256
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
+; SIZE-LABEL: 'test31'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -256
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %v
 ;
   %a = getelementptr inbounds i32, ptr %p, i32 -256
   %v = load i32, ptr %a
@@ -346,10 +502,15 @@ define i32 @test31(ptr %p) {
 }
 
 define i64 @test32(ptr %p) {
-; CHECK-LABEL: 'test32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -256
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+; RECIP-LABEL: 'test32'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -256
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
+; SIZE-LABEL: 'test32'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -256
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %v
 ;
   %a = getelementptr inbounds i64, ptr %p, i32 -256
   %v = load i64, ptr %a
@@ -357,10 +518,15 @@ define i64 @test32(ptr %p) {
 }
 
 define i8 @test33(ptr %p) {
-; CHECK-LABEL: 'test33'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -512
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+; RECIP-LABEL: 'test33'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -512
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
+; SIZE-LABEL: 'test33'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -512
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i8 %v
 ;
   %a = getelementptr inbounds i8, ptr %p, i32 -512
   %v = load i8, ptr %a
@@ -368,10 +534,15 @@ define i8 @test33(ptr %p) {
 }
 
 define i16 @test34(ptr %p) {
-; CHECK-LABEL: 'test34'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -512
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+; RECIP-LABEL: 'test34'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -512
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
+; SIZE-LABEL: 'test34'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -512
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i16 %v
 ;
   %a = getelementptr inbounds i16, ptr %p, i32 -512
   %v = load i16, ptr %a
@@ -379,10 +550,15 @@ define i16 @test34(ptr %p) {
 }
 
 define i32 @test35(ptr %p) {
-; CHECK-LABEL: 'test35'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -512
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+; RECIP-LABEL: 'test35'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -512
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
+; SIZE-LABEL: 'test35'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -512
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %v
 ;
   %a = getelementptr inbounds i32, ptr %p, i32 -512
   %v = load i32, ptr %a
@@ -390,10 +566,15 @@ define i32 @test35(ptr %p) {
 }
 
 define i64 @test36(ptr %p) {
-; CHECK-LABEL: 'test36'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -512
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+; RECIP-LABEL: 'test36'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -512
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
+; SIZE-LABEL: 'test36'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -512
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 %v
 ;
   %a = getelementptr inbounds i64, ptr %p, i32 -512
   %v = load i64, ptr %a

diff  --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
index 013d4876c9f37..ab048a8021381 100644
--- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
@@ -1,32 +1,57 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput | FileCheck %s --check-prefixes=CHECK,CHECK-F16
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -cost-kind=throughput | FileCheck %s --check-prefixes=RECIP,RECIP-NOF16
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -mattr=+fullfp16 -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output -cost-kind=throughput | FileCheck %s --check-prefixes=RECIP,RECIP-F16
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -cost-kind=throughput | FileCheck %s --check-prefixes=SIZE,SIZE-NOF16
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -mattr=+fullfp16 -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -cost-kind=throughput | FileCheck %s --check-prefixes=SIZE,SIZE-F16
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @umin() {
-; CHECK-LABEL: 'umin'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umin.v1i8(<1 x i8> undef, <1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umin.v3i8(<3 x i8> undef, <3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'umin'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umin.v1i8(<1 x i8> undef, <1 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umin.v3i8(<3 x i8> undef, <3 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'umin'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umin.v1i8(<1 x i8> undef, <1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umin.v3i8(<3 x i8> undef, <3 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %i8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
   %i16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
@@ -52,28 +77,51 @@ define void @umin() {
 }
 
 define void @umax() {
-; CHECK-LABEL: 'umax'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umax.v1i8(<1 x i8> undef, <1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umax.v3i8(<3 x i8> undef, <3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'umax'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umax.v1i8(<1 x i8> undef, <1 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umax.v3i8(<3 x i8> undef, <3 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'umax'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umax.v1i8(<1 x i8> undef, <1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umax.v3i8(<3 x i8> undef, <3 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %i8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
   %i16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
@@ -99,28 +147,51 @@ define void @umax() {
 }
 
 define void @smin() {
-; CHECK-LABEL: 'smin'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.smin.v3i8(<3 x i8> undef, <3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'smin'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.smin.v3i8(<3 x i8> undef, <3 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'smin'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.smin.v3i8(<3 x i8> undef, <3 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %i8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
   %i16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
@@ -146,28 +217,51 @@ define void @smin() {
 }
 
 define void @smax() {
-; CHECK-LABEL: 'smax'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.smax.v3i8(<3 x i8> undef, <3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'smax'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.smax.v3i8(<3 x i8> undef, <3 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'smax'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.smax.v3i8(<3 x i8> undef, <3 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %i8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
   %i16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
@@ -193,21 +287,37 @@ define void @smax() {
 }
 
 define void @minnum16() {
-; CHECK-NOF16-LABEL: 'minnum16'
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minnum.f16(half undef, half undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-F16-LABEL: 'minnum16'
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minnum.f16(half undef, half undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-NOF16-LABEL: 'minnum16'
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minnum.f16(half undef, half undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIP-F16-LABEL: 'minnum16'
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minnum.f16(half undef, half undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-NOF16-LABEL: 'minnum16'
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minnum.f16(half undef, half undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-F16-LABEL: 'minnum16'
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minnum.f16(half undef, half undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %f16 = call half @llvm.minnum.f16(half undef, half undef)
   %V2f16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
@@ -218,21 +328,37 @@ define void @minnum16() {
 }
 
 define void @maxnum16() {
-; CHECK-NOF16-LABEL: 'maxnum16'
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maxnum.f16(half undef, half undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-F16-LABEL: 'maxnum16'
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maxnum.f16(half undef, half undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-NOF16-LABEL: 'maxnum16'
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maxnum.f16(half undef, half undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIP-F16-LABEL: 'maxnum16'
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maxnum.f16(half undef, half undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-NOF16-LABEL: 'maxnum16'
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maxnum.f16(half undef, half undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-F16-LABEL: 'maxnum16'
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maxnum.f16(half undef, half undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %f16 = call half @llvm.maxnum.f16(half undef, half undef)
   %V2f16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
@@ -243,15 +369,25 @@ define void @maxnum16() {
 }
 
 define void @minnum() {
-; CHECK-LABEL: 'minnum'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.minnum.f32(float undef, float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'minnum'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.minnum.f32(float undef, float undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'minnum'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.minnum.f32(float undef, float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %f32 = call float @llvm.minnum.f32(float undef, float undef)
   %f64 = call double @llvm.minnum.f64(double undef, double undef)
@@ -264,15 +400,25 @@ define void @minnum() {
 }
 
 define void @maxnum() {
-; CHECK-LABEL: 'maxnum'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.maxnum.f32(float undef, float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'maxnum'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.maxnum.f32(float undef, float undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'maxnum'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.maxnum.f32(float undef, float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %f32 = call float @llvm.maxnum.f32(float undef, float undef)
   %f64 = call double @llvm.maxnum.f64(double undef, double undef)
@@ -286,21 +432,37 @@ define void @maxnum() {
 
 
 define void @minimum16() {
-; CHECK-NOF16-LABEL: 'minimum16'
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-F16-LABEL: 'minimum16'
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-NOF16-LABEL: 'minimum16'
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIP-F16-LABEL: 'minimum16'
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-NOF16-LABEL: 'minimum16'
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-F16-LABEL: 'minimum16'
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %f16 = call half @llvm.minimum.f16(half undef, half undef)
   %V2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
@@ -311,21 +473,37 @@ define void @minimum16() {
 }
 
 define void @maximum16() {
-; CHECK-NOF16-LABEL: 'maximum16'
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
-; CHECK-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; CHECK-F16-LABEL: 'maximum16'
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
-; CHECK-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-NOF16-LABEL: 'maximum16'
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
+; RECIP-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIP-F16-LABEL: 'maximum16'
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
+; RECIP-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-NOF16-LABEL: 'maximum16'
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
+; SIZE-NOF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-F16-LABEL: 'maximum16'
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
+; SIZE-F16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %f16 = call half @llvm.maximum.f16(half undef, half undef)
   %V2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
@@ -336,15 +514,25 @@ define void @maximum16() {
 }
 
 define void @minimum() {
-; CHECK-LABEL: 'minimum'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.minimum.f32(float undef, float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.minimum.f64(double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.minimum.v2f32(<2 x float> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'minimum'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.minimum.f32(float undef, float undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.minimum.f64(double undef, double undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.minimum.v2f32(<2 x float> undef, <2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'minimum'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.minimum.f32(float undef, float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.minimum.f64(double undef, double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.minimum.v2f32(<2 x float> undef, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %f32 = call float @llvm.minimum.f32(float undef, float undef)
   %f64 = call double @llvm.minimum.f64(double undef, double undef)
@@ -357,15 +545,25 @@ define void @minimum() {
 }
 
 define void @maximum() {
-; CHECK-LABEL: 'maximum'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.maximum.f32(float undef, float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.maximum.f64(double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.maximum.v2f32(<2 x float> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'maximum'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.maximum.f32(float undef, float undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.maximum.f64(double undef, double undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.maximum.v2f32(<2 x float> undef, <2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'maximum'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = call float @llvm.maximum.f32(float undef, float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.maximum.f64(double undef, double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f32 = call <2 x float> @llvm.maximum.v2f32(<2 x float> undef, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4f32 = call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4f64 = call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %f32 = call float @llvm.maximum.f32(float undef, float undef)
   %f64 = call double @llvm.maximum.f64(double undef, double undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/mul.ll b/llvm/test/Analysis/CostModel/AArch64/mul.ll
index a362edd014ffe..f31248d53f8ff 100644
--- a/llvm/test/Analysis/CostModel/AArch64/mul.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/mul.ll
@@ -1,131 +1,188 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=THROUGHPUT
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RECIP
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=COST
 
 ; Verify the cost of (vector) multiply instructions.
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define <2 x i8> @t1(<2 x i8> %a, <2 x i8> %b)  {
-; THROUGHPUT-LABEL: 't1'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <2 x i8> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i8> %1
+; RECIP-LABEL: 't1'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <2 x i8> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i8> %1
+;
+; COST-LABEL: 't1'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <2 x i8> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i8> %1
 ;
   %1 = mul <2 x i8> %a, %b
   ret <2 x i8> %1
 }
 
 define <4 x i8> @t2(<4 x i8> %a, <4 x i8> %b)  {
-; THROUGHPUT-LABEL: 't2'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <4 x i8> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %1
+; RECIP-LABEL: 't2'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <4 x i8> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %1
+;
+; COST-LABEL: 't2'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <4 x i8> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i8> %1
 ;
   %1 = mul <4 x i8> %a, %b
   ret <4 x i8> %1
 }
 
 define <8 x i8> @t3(<8 x i8> %a, <8 x i8> %b)  {
-; THROUGHPUT-LABEL: 't3'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <8 x i8> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %1
+; RECIP-LABEL: 't3'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <8 x i8> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %1
+;
+; COST-LABEL: 't3'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <8 x i8> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %1
 ;
   %1 = mul <8 x i8> %a, %b
   ret <8 x i8> %1
 }
 
 define <16 x i8> @t4(<16 x i8> %a, <16 x i8> %b)  {
-; THROUGHPUT-LABEL: 't4'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <16 x i8> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
+; RECIP-LABEL: 't4'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <16 x i8> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
+;
+; COST-LABEL: 't4'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <16 x i8> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %1
 ;
   %1 = mul <16 x i8> %a, %b
   ret <16 x i8> %1
 }
 
 define <32 x i8> @t5(<32 x i8> %a, <32 x i8> %b)  {
-; THROUGHPUT-LABEL: 't5'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = mul <32 x i8> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
+; RECIP-LABEL: 't5'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = mul <32 x i8> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
+;
+; COST-LABEL: 't5'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <32 x i8> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %1
 ;
   %1 = mul <32 x i8> %a, %b
   ret <32 x i8> %1
 }
 
 define <2 x i16> @t6(<2 x i16> %a, <2 x i16> %b)  {
-; THROUGHPUT-LABEL: 't6'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <2 x i16> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i16> %1
+; RECIP-LABEL: 't6'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <2 x i16> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i16> %1
+;
+; COST-LABEL: 't6'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <2 x i16> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i16> %1
 ;
   %1 = mul <2 x i16> %a, %b
   ret <2 x i16> %1
 }
 
 define <4 x i16> @t7(<4 x i16> %a, <4 x i16> %b)  {
-; THROUGHPUT-LABEL: 't7'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <4 x i16> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %1
+; RECIP-LABEL: 't7'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <4 x i16> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %1
+;
+; COST-LABEL: 't7'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <4 x i16> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %1
 ;
   %1 = mul <4 x i16> %a, %b
   ret <4 x i16> %1
 }
 
 define <8 x i16> @t8(<8 x i16> %a, <8 x i16> %b)  {
-; THROUGHPUT-LABEL: 't8'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <8 x i16> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
+; RECIP-LABEL: 't8'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <8 x i16> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
+;
+; COST-LABEL: 't8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <8 x i16> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %1
 ;
   %1 = mul <8 x i16> %a, %b
   ret <8 x i16> %1
 }
 
 define <16 x i16> @t9(<16 x i16> %a, <16 x i16> %b)  {
-; THROUGHPUT-LABEL: 't9'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = mul <16 x i16> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
+; RECIP-LABEL: 't9'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = mul <16 x i16> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
+;
+; COST-LABEL: 't9'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <16 x i16> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %1
 ;
   %1 = mul <16 x i16> %a, %b
   ret <16 x i16> %1
 }
 
 define <2 x i32> @t10(<2 x i32> %a, <2 x i32> %b)  {
-; THROUGHPUT-LABEL: 't10'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <2 x i32> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1
+; RECIP-LABEL: 't10'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <2 x i32> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %1
+;
+; COST-LABEL: 't10'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <2 x i32> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %1
 ;
   %1 = mul <2 x i32> %a, %b
   ret <2 x i32> %1
 }
 
 define <4 x i32> @t11(<4 x i32> %a, <4 x i32> %b)  {
-; THROUGHPUT-LABEL: 't11'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <4 x i32> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
+; RECIP-LABEL: 't11'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <4 x i32> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
+;
+; COST-LABEL: 't11'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <4 x i32> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %1
 ;
   %1 = mul <4 x i32> %a, %b
   ret <4 x i32> %1
 }
 
 define <8 x i32> @t12(<8 x i32> %a, <8 x i32> %b)  {
-; THROUGHPUT-LABEL: 't12'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = mul <8 x i32> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
+; RECIP-LABEL: 't12'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = mul <8 x i32> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
+;
+; COST-LABEL: 't12'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul <8 x i32> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %1
 ;
   %1 = mul <8 x i32> %a, %b
   ret <8 x i32> %1
 }
 
 define <2 x i64> @t13(<2 x i64> %a, <2 x i64> %b)  {
-; THROUGHPUT-LABEL: 't13'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %1 = mul nsw <2 x i64> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %1
+; RECIP-LABEL: 't13'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %1 = mul nsw <2 x i64> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %1
+;
+; COST-LABEL: 't13'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul nsw <2 x i64> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %1
 ;
   %1 = mul nsw <2 x i64> %a, %b
   ret <2 x i64> %1
 }
 
 define <4 x i64> @t14(<4 x i64> %a, <4 x i64> %b)  {
-; THROUGHPUT-LABEL: 't14'
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %1 = mul nsw <4 x i64> %a, %b
-; THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
+; RECIP-LABEL: 't14'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %1 = mul nsw <4 x i64> %a, %b
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
+;
+; COST-LABEL: 't14'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = mul nsw <4 x i64> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %1
 ;
   %1 = mul nsw <4 x i64> %a, %b
   ret <4 x i64> %1

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-add.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-add.ll
index 242955cc2d099..7341363f36d18 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-add.ll
@@ -1,27 +1,47 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RECIP
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=COST
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @reduce() {
-; CHECK-LABEL: 'reduce'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'reduce'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; COST-LABEL: 'reduce'
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1i8 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V3i8 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1i8 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
   %V3i8 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
index fd7539b824cf8..632cc4dc3e7d8 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
@@ -1,34 +1,61 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RECIP
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=COST
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @reduce() {
-; CHECK-LABEL: 'reduce'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'reduce'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; COST-LABEL: 'reduce'
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 
   %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
index a95542f690173..79be838ed2f9e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
@@ -1,172 +1,135 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64--linux-gnu < %s | FileCheck %s
-; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=FP16
-; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+bf16 < %s | FileCheck %s --check-prefix=BF16
+; RUN: opt -passes='print<cost-model>' 2>&1 -cost-kind=throughput -disable-output -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=RECIP,RECIP-NOFP16
+; RUN: opt -passes='print<cost-model>' 2>&1 -cost-kind=throughput -disable-output -mtriple=aarch64-linux-gnu -mattr=+fullfp16 < %s | FileCheck %s --check-prefixes=RECIP,RECIP-FP16
+; RUN: opt -passes='print<cost-model>' 2>&1 -cost-kind=throughput -disable-output -mtriple=aarch64-linux-gnu -mattr=+bf16 < %s | FileCheck %s --check-prefixes=RECIP,RECIPBF16
+; RUN: opt -passes='print<cost-model>' 2>&1 -cost-kind=code-size -disable-output -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=SIZE,SIZE-NOFP16
+; RUN: opt -passes='print<cost-model>' 2>&1 -cost-kind=code-size -disable-output -mtriple=aarch64-linux-gnu -mattr=+fullfp16 < %s | FileCheck %s --check-prefixes=SIZE,SIZE-FP16
+; RUN: opt -passes='print<cost-model>' 2>&1 -cost-kind=code-size -disable-output -mtriple=aarch64-linux-gnu -mattr=+bf16 < %s | FileCheck %s --check-prefixes=SIZE,SIZE-BF16
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @strict_fp_reductions() {
-; CHECK-LABEL: 'strict_fp_reductions'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; FP16-LABEL: 'strict_fp_reductions'
-; FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; BF16-LABEL: 'strict_fp_reductions'
-; BF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'strict_fp_reductions'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'strict_fp_reductions'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
-  %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
-  %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
-  %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
   %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
   %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
   %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
   %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
   %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
-  %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4f8(bfloat 0.0, <4 x bfloat> undef)
   %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+  ret void
+}
 
+define void @strict_fp_reductions_fp16() {
+; RECIP-NOFP16-LABEL: 'strict_fp_reductions_fp16'
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIP-FP16-LABEL: 'strict_fp_reductions_fp16'
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIPBF16-LABEL: 'strict_fp_reductions_fp16'
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'strict_fp_reductions_fp16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+  %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
+  %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+  %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
   ret void
 }
 
+define void @strict_fp_reductions_bf16() {
+; RECIP-NOFP16-LABEL: 'strict_fp_reductions_bf16'
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIP-FP16-LABEL: 'strict_fp_reductions_bf16'
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIPBF16-LABEL: 'strict_fp_reductions_bf16'
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'strict_fp_reductions_bf16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %fadd_v4f8 = call bfloat @llvm.vector.reduce.fadd.v4f8(bfloat 0.0, <4 x bfloat> undef)
+  ret void
+}
 
 define void @fast_fp_reductions() {
-; CHECK-LABEL: 'fast_fp_reductions'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; FP16-LABEL: 'fast_fp_reductions'
-; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; FP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; BF16-LABEL: 'fast_fp_reductions'
-; BF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; BF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'fast_fp_reductions'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'fast_fp_reductions'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
-  %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
-
-  %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
-  %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
-
-  %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
-  %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
-
-  %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
-  %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
-
-  %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0.0, <11 x half> undef)
-  %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0.0, <13 x half> undef)
-
   %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
   %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
 
@@ -188,12 +151,129 @@ define void @fast_fp_reductions() {
   %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.0, <7 x double> undef)
   %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.0, <9 x double> undef)
 
-  %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4f8(bfloat -0.0, <4 x bfloat> undef)
   %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
 
   ret void
 }
 
+define void @fast_fp_reductions_fp16() {
+; RECIP-NOFP16-LABEL: 'fast_fp_reductions_fp16'
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIP-FP16-LABEL: 'fast_fp_reductions_fp16'
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIPBF16-LABEL: 'fast_fp_reductions_fp16'
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-NOFP16-LABEL: 'fast_fp_reductions_fp16'
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; SIZE-NOFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-FP16-LABEL: 'fast_fp_reductions_fp16'
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; SIZE-FP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-BF16-LABEL: 'fast_fp_reductions_fp16'
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef)
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef)
+; SIZE-BF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+  %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+
+  %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
+  %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
+
+  %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+  %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+
+  %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+  %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+
+  %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0.0, <11 x half> undef)
+  %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0.0, <13 x half> undef)
+
+  ret void
+}
+
+define void @fast_fp_reductions_bf16() {
+; RECIP-NOFP16-LABEL: 'fast_fp_reductions_bf16'
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
+; RECIP-NOFP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIP-FP16-LABEL: 'fast_fp_reductions_bf16'
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
+; RECIP-FP16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RECIPBF16-LABEL: 'fast_fp_reductions_bf16'
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
+; RECIPBF16-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'fast_fp_reductions_bf16'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4f8(bfloat -0.0, <4 x bfloat> undef)
+  ret void
+}
+
 declare bfloat @llvm.vector.reduce.fadd.v4f8(bfloat, <4 x bfloat>)
 declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>)
 

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
index cf03026ceb75a..fc4f9adc0566f 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,CHECK-F16
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -mattr=+fullfp16 -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,CHECK-F16
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
index 7215b6cbc727a..ba747b8321ee8 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
@@ -1,34 +1,61 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RECIP
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=COST
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @reduce() {
-; CHECK-LABEL: 'reduce'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'reduce'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; COST-LABEL: 'reduce'
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 
   %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
index eeb32d6d27a50..b916c38ad220f 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
@@ -1,34 +1,61 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RECIP
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=COST
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @reduce() {
-; CHECK-LABEL: 'reduce'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'reduce'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; COST-LABEL: 'reduce'
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 
   %V1   = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/select.ll b/llvm/test/Analysis/CostModel/AArch64/select.ll
index a69d02bb562ef..07e4cd4c906d0 100644
--- a/llvm/test/Analysis/CostModel/AArch64/select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/select.ll
@@ -1,49 +1,49 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-THROUGHPUT
-; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=aarch64-- | FileCheck %s --check-prefix=CHECK-SIZE
+; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s --check-prefix=RECIP
+; RUN: opt < %s  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=aarch64-- | FileCheck %s --check-prefix=SIZE
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 
 define void @select() {
     ; Scalar values
-; CHECK-THROUGHPUT-LABEL: 'select'
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'select'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; CHECK-SIZE-LABEL: 'select'
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; SIZE-LABEL: 'select'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = select i1 undef, i16 undef, i16 undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3 = select i1 undef, i32 undef, i32 undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2float = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4float = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2double = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4half = select <4 x i1> undef, <4 x half> undef, <4 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8half = select <8 x i1> undef, <8 x half> undef, <8 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v1 = select i1 undef, i8 undef, i8 undef
   %v2 = select i1 undef, i16 undef, i16 undef

diff  --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll
index 469caac08c9d3..86d2cb0038f4a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll
@@ -1,22 +1,37 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RECIP
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=COST
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @broadcast() {
-; CHECK-LABEL: 'broadcast'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'broadcast'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; COST-LABEL: 'broadcast'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v7 = shufflevector <2 x i8> undef, <2 x i8>undef, <2 x i32> zeroinitializer
   %v8 = shufflevector <4 x i8> undef, <4 x i8>undef, <4 x i32> zeroinitializer

diff  --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll
index 7a11d6ff0cffd..e1bf345a09da1 100644
--- a/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=RECIP
+; RUN: opt < %s -mtriple=aarch64-linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=COST
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -9,34 +10,63 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 ;; Reverse shuffles should be lowered to vrev and possibly a vext (for quadwords, on neon)
 define void @reverse() {
-; CHECK-LABEL: 'reverse'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'reverse'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <2 x i32> <i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; COST-LABEL: 'reverse'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <2 x i32> <i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
   %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -76,32 +106,59 @@ define void @reverse() {
 }
 
 define void @vrev64() {
-; CHECK-LABEL: 'vrev64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64 = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'vrev64'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64 = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; COST-LABEL: 'vrev64'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; COST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; COST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; COST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64 = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; COST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
@@ -139,24 +196,43 @@ define void @vrev64() {
 }
 
 define void @vrev32() {
-; CHECK-LABEL: 'vrev32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64 = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'vrev32'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64 = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; COST-LABEL: 'vrev32'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; COST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; COST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; COST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f64 = shufflevector <8 x double> undef, <8 x double> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; COST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
@@ -186,16 +262,27 @@ define void @vrev32() {
 }
 
 define void @vrev16() {
-; CHECK-LABEL: 'vrev16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f32 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RECIP-LABEL: 'vrev16'
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f32 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; COST-LABEL: 'vrev16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; COST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; COST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; COST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f32 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; COST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f64 = shufflevector <16 x double> undef, <16 x double> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>