[llvm] d663166 - [CostModel][X86] Reduce cost of v2i64 icmp base cost on SSE2 targets

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 30 01:13:19 PDT 2022


Author: Simon Pilgrim
Date: 2022-03-30T09:11:55+01:00
New Revision: d663166acbe59e4f17e406302552a1a9005204e8

URL: https://github.com/llvm/llvm-project/commit/d663166acbe59e4f17e406302552a1a9005204e8
DIFF: https://github.com/llvm/llvm-project/commit/d663166acbe59e4f17e406302552a1a9005204e8.diff

LOG: [CostModel][X86] Reduce cost of v2i64 icmp base cost on SSE2 targets

Based off the script from D103695, we were exaggerating the cost of the v2i64 comparison expansion using instruction count instead of effective throughput

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/arith-overflow.ll
    llvm/test/Analysis/CostModel/X86/arith-sminmax.ll
    llvm/test/Analysis/CostModel/X86/arith-ssat.ll
    llvm/test/Analysis/CostModel/X86/arith-uminmax.ll
    llvm/test/Analysis/CostModel/X86/arith-usat.ll
    llvm/test/Analysis/CostModel/X86/fshl.ll
    llvm/test/Analysis/CostModel/X86/fshr.ll
    llvm/test/Analysis/CostModel/X86/icmp.ll
    llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
    llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
    llvm/test/Analysis/CostModel/X86/reduce-smax.ll
    llvm/test/Analysis/CostModel/X86/reduce-smin.ll
    llvm/test/Analysis/CostModel/X86/reduce-umax.ll
    llvm/test/Analysis/CostModel/X86/reduce-umin.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index c6801a6c0b5c8..abefa62c9d52c 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2695,7 +2695,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
   static const CostTblEntry SSE2CostTbl[] = {
     { ISD::SETCC,   MVT::v2f64,   2 },
     { ISD::SETCC,   MVT::f64,     1 },
-    { ISD::SETCC,   MVT::v2i64,   8 },
+    { ISD::SETCC,   MVT::v2i64,   5 }, // pcmpeqd/pcmpgtd expansion
     { ISD::SETCC,   MVT::v4i32,   1 },
     { ISD::SETCC,   MVT::v8i16,   1 },
     { ISD::SETCC,   MVT::v16i8,   1 },

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index 34ed915599ef2..c5aa9a49daa8f 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -38,9 +38,9 @@ declare {<64 x i8>, <64 x i1>}  @llvm.sadd.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @sadd(i32 %arg) {
 ; SSSE3-LABEL: 'sadd'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -276,9 +276,9 @@ declare {<64 x i8>, <64 x i1>}  @llvm.uadd.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @uadd(i32 %arg) {
 ; SSSE3-LABEL: 'uadd'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -514,9 +514,9 @@ declare {<64 x i8>, <64 x i1>}  @llvm.ssub.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @ssub(i32 %arg) {
 ; SSSE3-LABEL: 'ssub'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -752,9 +752,9 @@ declare {<64 x i8>, <64 x i1>}  @llvm.usub.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @usub(i32 %arg) {
 ; SSSE3-LABEL: 'usub'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -990,9 +990,9 @@ declare {<64 x i8>, <64 x i1>}  @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @smul(i32 %arg) {
 ; SSSE3-LABEL: 'smul'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -1228,9 +1228,9 @@ declare {<64 x i8>, <64 x i1>}  @llvm.umul.with.overflow.v64i8(<64 x i8>, <64 x
 define i32 @umul(i32 %arg) {
 ; SSSE3-LABEL: 'umul'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll b/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll
index 799638bcce1e7..2d74f07ba9b41 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-sminmax.ll
@@ -31,9 +31,9 @@ declare <64 x i8>  @llvm.smax.v64i8(<64 x i8>, <64 x i8>)
 define i32 @smax(i32 %arg) {
 ; SSE2-LABEL: 'smax'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -50,9 +50,9 @@ define i32 @smax(i32 %arg) {
 ;
 ; SSSE3-LABEL: 'smax'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -227,9 +227,9 @@ declare <64 x i8>  @llvm.smin.v64i8(<64 x i8>, <64 x i8>)
 define i32 @smin(i32 %arg) {
 ; SSE2-LABEL: 'smin'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -246,9 +246,9 @@ define i32 @smin(i32 %arg) {
 ;
 ; SSSE3-LABEL: 'smin'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-ssat.ll b/llvm/test/Analysis/CostModel/X86/arith-ssat.ll
index b2236a30b62a2..64d2cd4a2f28e 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-ssat.ll
@@ -37,9 +37,9 @@ declare <64 x i8>  @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>)
 define i32 @add(i32 %arg) {
 ; SSSE3-LABEL: 'add'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 89 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -271,9 +271,9 @@ declare <64 x i8>  @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>)
 define i32 @sub(i32 %arg) {
 ; SSSE3-LABEL: 'sub'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 89 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll b/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll
index 41cc68fadf2cf..71cdb0d8ef498 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-uminmax.ll
@@ -31,9 +31,9 @@ declare <64 x i8>  @llvm.umax.v64i8(<64 x i8>, <64 x i8>)
 define i32 @umax(i32 %arg) {
 ; SSE2-LABEL: 'umax'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -50,9 +50,9 @@ define i32 @umax(i32 %arg) {
 ;
 ; SSSE3-LABEL: 'umax'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -227,9 +227,9 @@ declare <64 x i8>  @llvm.umin.v64i8(<64 x i8>, <64 x i8>)
 define i32 @umin(i32 %arg) {
 ; SSE2-LABEL: 'umin'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -246,9 +246,9 @@ define i32 @umin(i32 %arg) {
 ;
 ; SSSE3-LABEL: 'umin'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-usat.ll b/llvm/test/Analysis/CostModel/X86/arith-usat.ll
index 88f2b6358d33f..398c12582ff3c 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-usat.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-usat.ll
@@ -38,9 +38,9 @@ declare <64 x i8>  @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>)
 define i32 @add(i32 %arg) {
 ; SSSE3-LABEL: 'add'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -272,9 +272,9 @@ declare <64 x i8>  @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>)
 define i32 @sub(i32 %arg) {
 ; SSSE3-LABEL: 'sub'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll
index 810d4867c97bc..ca0d549ee8557 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl.ll
@@ -22,9 +22,9 @@ target triple = "x86_64-apple-macosx10.8.0"
 define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512, i64 %c64, <2 x i64> %c128, <4 x i64> %c256, <8 x i64> %c512) {
 ; SSSE3-LABEL: 'var_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 %c64)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'var_funnel_i64'
@@ -312,9 +312,9 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatvar_funnel_i64'
@@ -678,9 +678,9 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'constant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 1, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 1, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'constant_funnel_i64'
@@ -966,9 +966,9 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i64'

diff  --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll
index 8b634e7b74bac..f5d7230e44dbc 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr.ll
@@ -22,9 +22,9 @@ target triple = "x86_64-apple-macosx10.8.0"
 define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512, i64 %c64, <2 x i64> %c128, <4 x i64> %c256, <8 x i64> %c512) {
 ; SSSE3-LABEL: 'var_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'var_funnel_i64'
@@ -312,9 +312,9 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatvar_funnel_i64'
@@ -678,9 +678,9 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @constant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'constant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 1, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 1, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 1, i64 7, i64 15, i64 31>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'constant_funnel_i64'
@@ -966,9 +966,9 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i64'

diff  --git a/llvm/test/Analysis/CostModel/X86/icmp.ll b/llvm/test/Analysis/CostModel/X86/icmp.ll
index a51f54513a05a..c6e2f4e750886 100644
--- a/llvm/test/Analysis/CostModel/X86/icmp.ll
+++ b/llvm/test/Analysis/CostModel/X86/icmp.ll
@@ -33,10 +33,10 @@ define i32 @cmp_int_eq(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_eq'
@@ -56,10 +56,10 @@ define i32 @cmp_int_eq(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_eq'
@@ -79,10 +79,10 @@ define i32 @cmp_int_eq(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_eq'
@@ -102,10 +102,10 @@ define i32 @cmp_int_eq(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_eq'
@@ -337,10 +337,10 @@ define i32 @cmp_int_ne(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_ne'
@@ -360,10 +360,10 @@ define i32 @cmp_int_ne(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_ne'
@@ -383,10 +383,10 @@ define i32 @cmp_int_ne(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_ne'
@@ -406,10 +406,10 @@ define i32 @cmp_int_ne(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_ne'
@@ -641,10 +641,10 @@ define i32 @cmp_int_sge(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_sge'
@@ -664,10 +664,10 @@ define i32 @cmp_int_sge(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_sge'
@@ -687,10 +687,10 @@ define i32 @cmp_int_sge(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_sge'
@@ -710,10 +710,10 @@ define i32 @cmp_int_sge(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_sge'
@@ -945,10 +945,10 @@ define i32 @cmp_int_uge(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_uge'
@@ -968,10 +968,10 @@ define i32 @cmp_int_uge(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_uge'
@@ -991,10 +991,10 @@ define i32 @cmp_int_uge(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_uge'
@@ -1014,10 +1014,10 @@ define i32 @cmp_int_uge(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_uge'
@@ -1249,10 +1249,10 @@ define i32 @cmp_int_sgt(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_sgt'
@@ -1272,10 +1272,10 @@ define i32 @cmp_int_sgt(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_sgt'
@@ -1295,10 +1295,10 @@ define i32 @cmp_int_sgt(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_sgt'
@@ -1318,10 +1318,10 @@ define i32 @cmp_int_sgt(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_sgt'
@@ -1553,10 +1553,10 @@ define i32 @cmp_int_ugt(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_ugt'
@@ -1576,10 +1576,10 @@ define i32 @cmp_int_ugt(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_ugt'
@@ -1599,10 +1599,10 @@ define i32 @cmp_int_ugt(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_ugt'
@@ -1622,10 +1622,10 @@ define i32 @cmp_int_ugt(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_ugt'
@@ -1857,10 +1857,10 @@ define i32 @cmp_int_sle(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_sle'
@@ -1880,10 +1880,10 @@ define i32 @cmp_int_sle(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_sle'
@@ -1903,10 +1903,10 @@ define i32 @cmp_int_sle(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_sle'
@@ -1926,10 +1926,10 @@ define i32 @cmp_int_sle(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_sle'
@@ -2161,10 +2161,10 @@ define i32 @cmp_int_ule(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_ule'
@@ -2184,10 +2184,10 @@ define i32 @cmp_int_ule(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_ule'
@@ -2207,10 +2207,10 @@ define i32 @cmp_int_ule(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_ule'
@@ -2230,10 +2230,10 @@ define i32 @cmp_int_ule(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_ule'
@@ -2465,10 +2465,10 @@ define i32 @cmp_int_slt(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_slt'
@@ -2488,10 +2488,10 @@ define i32 @cmp_int_slt(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_slt'
@@ -2511,10 +2511,10 @@ define i32 @cmp_int_slt(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_slt'
@@ -2534,10 +2534,10 @@ define i32 @cmp_int_slt(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_slt'
@@ -2769,10 +2769,10 @@ define i32 @cmp_int_ult(i32 %arg) {
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_ult'
@@ -2792,10 +2792,10 @@ define i32 @cmp_int_ult(i32 %arg) {
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_ult'
@@ -2815,10 +2815,10 @@ define i32 @cmp_int_ult(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_ult'
@@ -2838,10 +2838,10 @@ define i32 @cmp_int_ult(i32 %arg) {
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_ult'

diff  --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
index 07cabc11e817e..36d027bb6956b 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
@@ -1024,7 +1024,7 @@ define i32 @masked_compressstore() {
 
 define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
 ; SSE2-LABEL: 'test1'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;

diff  --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
index 3284077f948f5..a6fb78f6c7e41 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -1408,7 +1408,7 @@ define i32 @masked_compressstore() {
 
 define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
 ; SSE2-LABEL: 'test1'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;

diff  --git a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
index 721c571a729c0..a7e5808efec3f 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
@@ -12,26 +12,26 @@
 define i32 @reduce_i64(i32 %arg) {
 ; SSE2-LABEL: 'reduce_i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'reduce_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'reduce_i64'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'reduce_i64'

diff  --git a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
index 6b2732dd19015..49aee00bb9f9c 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
@@ -12,26 +12,26 @@
 define i32 @reduce_i64(i32 %arg) {
 ; SSE2-LABEL: 'reduce_i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'reduce_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'reduce_i64'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'reduce_i64'

diff  --git a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
index c4ed873d19caf..2979197e05e16 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
@@ -12,26 +12,26 @@
 define i32 @reduce_i64(i32 %arg) {
 ; SSE2-LABEL: 'reduce_i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'reduce_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'reduce_i64'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'reduce_i64'

diff  --git a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
index 14c44287838ac..356a99eb0f243 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
@@ -12,26 +12,26 @@
 define i32 @reduce_i64(i32 %arg) {
 ; SSE2-LABEL: 'reduce_i64'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'reduce_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'reduce_i64'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'reduce_i64'


        


More information about the llvm-commits mailing list