[llvm] f8fa042 - [CostModel][X86] Add CostKinds handling for vector integer comparisons

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 16 05:21:28 PDT 2022


Author: Simon Pilgrim
Date: 2022-09-16T13:03:41+01:00
New Revision: f8fa04295faac7140c46847c4ea86a8d624919ca

URL: https://github.com/llvm/llvm-project/commit/f8fa04295faac7140c46847c4ea86a8d624919ca
DIFF: https://github.com/llvm/llvm-project/commit/f8fa04295faac7140c46847c4ea86a8d624919ca.diff

LOG: [CostModel][X86] Add CostKinds handling for vector integer comparisons

These were based off a mixture of vector integer add/sub costs and the numbers from the 'cost-tables vs llvm-mca' script from D103695 - the extra costs for different predicates are still proving tricky to implement, but I've gotten most costs to within +/1 now - the AVX512 are tricky as we still don't handle predicate results properly, so most of these were done by hand.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/arith-overflow.ll
    llvm/test/Analysis/CostModel/X86/fshl.ll
    llvm/test/Analysis/CostModel/X86/fshr.ll
    llvm/test/Analysis/CostModel/X86/icmp-codesize.ll
    llvm/test/Analysis/CostModel/X86/icmp-latency.ll
    llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/icmp.ll
    llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
    llvm/test/Analysis/CostModel/X86/reduce-smax.ll
    llvm/test/Analysis/CostModel/X86/reduce-smin.ll
    llvm/test/Analysis/CostModel/X86/reduce-umax.ll
    llvm/test/Analysis/CostModel/X86/reduce-umin.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index f2fdc541217f9..133c933b633a8 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3104,8 +3104,10 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
   };
 
   static const CostKindTblEntry AVX512BWCostTbl[] = {
-    { ISD::SETCC,   MVT::v32i16,  { 1 } },
-    { ISD::SETCC,   MVT::v64i8,   { 1 } },
+    { ISD::SETCC,   MVT::v32i16,  { 1, 1, 1, 1 } },
+    { ISD::SETCC,   MVT::v16i16,  { 1, 1, 1, 1 } },
+    { ISD::SETCC,   MVT::v64i8,   { 1, 1, 1, 1 } },
+    { ISD::SETCC,   MVT::v32i8,   { 1, 1, 1, 1 } },
 
     { ISD::SELECT,  MVT::v32i16,  { 1, 1, 1, 1 } },
     { ISD::SELECT,  MVT::v64i8,   { 1, 1, 1, 1 } },
@@ -3117,8 +3119,13 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     { ISD::SETCC,   MVT::v16f32,  { 1, 4, 1, 1 } },
     { ISD::SETCC,   MVT::v8f32,   { 1, 4, 1, 1 } },
 
-    { ISD::SETCC,   MVT::v8i64,   { 1 } },
-    { ISD::SETCC,   MVT::v16i32,  { 1 } },
+    { ISD::SETCC,   MVT::v8i64,   { 1, 1, 1, 1 } },
+    { ISD::SETCC,   MVT::v4i64,   { 1, 1, 1, 1 } },
+    { ISD::SETCC,   MVT::v2i64,   { 1, 1, 1, 1 } },
+    { ISD::SETCC,   MVT::v16i32,  { 1, 1, 1, 1 } },
+    { ISD::SETCC,   MVT::v8i32,   { 1, 1, 1, 1 } },
+    { ISD::SETCC,   MVT::v32i16,  { 3, 7, 5, 5 } },
+    { ISD::SETCC,   MVT::v64i8,   { 3, 7, 5, 5 } },
 
     { ISD::SELECT,  MVT::v8i64,   { 1, 1, 1, 1 } },
     { ISD::SELECT,  MVT::v4i64,   { 1, 1, 1, 1 } },
@@ -3135,9 +3142,6 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     { ISD::SELECT,  MVT::v4f32,   { 1, 1, 1, 1 } },
     { ISD::SELECT,  MVT::f32  ,   { 1, 1, 1, 1 } },
 
-    { ISD::SETCC,   MVT::v32i16,  { 2 } }, // FIXME: should probably be 4
-    { ISD::SETCC,   MVT::v64i8,   { 2 } }, // FIXME: should probably be 4
-
     { ISD::SELECT,  MVT::v32i16,  { 2, 2, 4, 4 } },
     { ISD::SELECT,  MVT::v16i16,  { 1, 1, 1, 1 } },
     { ISD::SELECT,  MVT::v8i16,   { 1, 1, 1, 1 } },
@@ -3154,10 +3158,10 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     { ISD::SETCC,   MVT::v4f32,   { 1, 4, 1, 1 } },
     { ISD::SETCC,   MVT::f32,     { 1, 4, 1, 1 } },
 
-    { ISD::SETCC,   MVT::v4i64,   { 1 } },
-    { ISD::SETCC,   MVT::v8i32,   { 1 } },
-    { ISD::SETCC,   MVT::v16i16,  { 1 } },
-    { ISD::SETCC,   MVT::v32i8,   { 1 } },
+    { ISD::SETCC,   MVT::v4i64,   { 1, 1, 1, 2 } },
+    { ISD::SETCC,   MVT::v8i32,   { 1, 1, 1, 2 } },
+    { ISD::SETCC,   MVT::v16i16,  { 1, 1, 1, 2 } },
+    { ISD::SETCC,   MVT::v32i8,   { 1, 1, 1, 2 } },
 
     { ISD::SELECT,  MVT::v4f64,   { 2, 2, 1, 2 } }, // vblendvpd
     { ISD::SELECT,  MVT::v8f32,   { 2, 2, 1, 2 } }, // vblendvps
@@ -3167,6 +3171,11 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     { ISD::SELECT,  MVT::v32i8,   { 2, 2, 1, 2 } }, // pblendvb
   };
 
+  static const CostKindTblEntry XOPCostTbl[] = {
+    { ISD::SETCC,   MVT::v4i64,   { 4, 2, 5, 6 } },
+    { ISD::SETCC,   MVT::v2i64,   { 1, 1, 1, 1 } },
+  };
+
   static const CostKindTblEntry AVX1CostTbl[] = {
     { ISD::SETCC,   MVT::v4f64,   { 2, 3, 1, 2 } },
     { ISD::SETCC,   MVT::v2f64,   { 1, 3, 1, 1 } },
@@ -3176,10 +3185,10 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     { ISD::SETCC,   MVT::f32,     { 1, 3, 1, 1 } },
 
     // AVX1 does not support 8-wide integer compare.
-    { ISD::SETCC,   MVT::v4i64,   { 4 } },
-    { ISD::SETCC,   MVT::v8i32,   { 4 } },
-    { ISD::SETCC,   MVT::v16i16,  { 4 } },
-    { ISD::SETCC,   MVT::v32i8,   { 4 } },
+    { ISD::SETCC,   MVT::v4i64,   { 4, 2, 5, 6 } },
+    { ISD::SETCC,   MVT::v8i32,   { 4, 2, 5, 6 } },
+    { ISD::SETCC,   MVT::v16i16,  { 4, 2, 5, 6 } },
+    { ISD::SETCC,   MVT::v32i8,   { 4, 2, 5, 6 } },
 
     { ISD::SELECT,  MVT::v4f64,   { 3, 3, 1, 2 } }, // vblendvpd
     { ISD::SELECT,  MVT::v8f32,   { 3, 3, 1, 2 } }, // vblendvps
@@ -3190,7 +3199,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
   };
 
   static const CostKindTblEntry SSE42CostTbl[] = {
-    { ISD::SETCC,   MVT::v2i64,   { 1 } },
+    { ISD::SETCC,   MVT::v2i64,   { 1, 2, 1, 2 } },
   };
 
   static const CostKindTblEntry SSE41CostTbl[] = {
@@ -3211,10 +3220,10 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     { ISD::SETCC,   MVT::v2f64,   { 2, 5, 1, 1 } },
     { ISD::SETCC,   MVT::f64,     { 1, 5, 1, 1 } },
 
-    { ISD::SETCC,   MVT::v2i64,   { 5 } }, // pcmpeqd/pcmpgtd expansion
-    { ISD::SETCC,   MVT::v4i32,   { 1 } },
-    { ISD::SETCC,   MVT::v8i16,   { 1 } },
-    { ISD::SETCC,   MVT::v16i8,   { 1 } },
+    { ISD::SETCC,   MVT::v2i64,   { 5, 4, 5, 5 } }, // pcmpeqd/pcmpgtd expansion
+    { ISD::SETCC,   MVT::v4i32,   { 1, 1, 1, 1 } },
+    { ISD::SETCC,   MVT::v8i16,   { 1, 1, 1, 1 } },
+    { ISD::SETCC,   MVT::v16i8,   { 1, 1, 1, 1 } },
 
     { ISD::SELECT,  MVT::v2f64,   { 2, 2, 3, 3 } }, // andpd + andnpd + orpd
     { ISD::SELECT,  MVT::f64,     { 2, 2, 3, 3 } }, // andpd + andnpd + orpd
@@ -3252,6 +3261,11 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
       if (auto KindCost = Entry->Cost[CostKind])
         return LT.first * (ExtraCost + KindCost.value());
 
+  if (ST->hasXOP())
+    if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return LT.first * (ExtraCost + KindCost.value());
+
   if (ST->hasAVX())
     if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
       if (auto KindCost = Entry->Cost[CostKind])

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index c282c4ae7b672..10007f8e4599d 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -124,11 +124,11 @@ define i32 @sadd(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sadd'
@@ -162,11 +162,11 @@ define i32 @sadd(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'sadd'
@@ -362,11 +362,11 @@ define i32 @uadd(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'uadd'
@@ -400,11 +400,11 @@ define i32 @uadd(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'uadd'
@@ -600,11 +600,11 @@ define i32 @ssub(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'ssub'
@@ -638,11 +638,11 @@ define i32 @ssub(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'ssub'
@@ -838,11 +838,11 @@ define i32 @usub(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'usub'
@@ -876,11 +876,11 @@ define i32 @usub(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'usub'
@@ -1076,11 +1076,11 @@ define i32 @smul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'smul'
@@ -1114,11 +1114,11 @@ define i32 @smul(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'smul'
@@ -1314,11 +1314,11 @@ define i32 @umul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'umul'
@@ -1352,11 +1352,11 @@ define i32 @umul(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'umul'

diff  --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll
index 68320887960c3..3a35f85e76d12 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl.ll
@@ -180,7 +180,7 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_funnel_i16'
@@ -194,7 +194,7 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_funnel_i16'
@@ -258,7 +258,7 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_funnel_i8'
@@ -272,7 +272,7 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_funnel_i8'
@@ -496,7 +496,7 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_funnel_i16'
@@ -514,7 +514,7 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_funnel_i16'
@@ -587,7 +587,7 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_funnel_i8'
@@ -605,7 +605,7 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_funnel_i8'
@@ -802,7 +802,7 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_funnel_i16'
@@ -816,7 +816,7 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_funnel_i16'
@@ -880,7 +880,7 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_funnel_i8'
@@ -894,7 +894,7 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_funnel_i8'
@@ -1076,7 +1076,7 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
@@ -1090,7 +1090,7 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
@@ -1147,7 +1147,7 @@ define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
@@ -1161,7 +1161,7 @@ define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll
index dbd28ce3dfc79..a11d968d2050d 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr.ll
@@ -180,7 +180,7 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_funnel_i16'
@@ -194,7 +194,7 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_funnel_i16'
@@ -258,7 +258,7 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'var_funnel_i8'
@@ -272,7 +272,7 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'var_funnel_i8'
@@ -496,7 +496,7 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_funnel_i16'
@@ -514,7 +514,7 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_funnel_i16'
@@ -587,7 +587,7 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatvar_funnel_i8'
@@ -605,7 +605,7 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatvar_funnel_i8'
@@ -802,7 +802,7 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_funnel_i16'
@@ -816,7 +816,7 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_funnel_i16'
@@ -880,7 +880,7 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'constant_funnel_i8'
@@ -894,7 +894,7 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'constant_funnel_i8'
@@ -1076,7 +1076,7 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
@@ -1090,7 +1090,7 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
@@ -1147,7 +1147,7 @@ define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
@@ -1161,7 +1161,7 @@ define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'

diff  --git a/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll b/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll
index b12339a3eab79..511ef81629c1d 100644
--- a/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll
@@ -1,43 +1,204 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.1 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx2 | FileCheck %s
-;
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2
+;
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
 
 define i32 @cmp_int_eq(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_eq'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_eq'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_eq'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_eq'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_eq'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_eq'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_eq'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_eq'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_eq'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp eq i8 undef, undef
   %V16I8 = icmp eq <16 x i8> undef, undef
@@ -67,28 +228,189 @@ define i32 @cmp_int_eq(i32 %arg) {
 }
 
 define i32 @cmp_int_ne(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ne'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_ne'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_ne'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ne'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ne'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ne'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ne'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ne'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ne'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ne i8 undef, undef
   %V16I8 = icmp ne <16 x i8> undef, undef
@@ -118,28 +440,189 @@ define i32 @cmp_int_ne(i32 %arg) {
 }
 
 define i32 @cmp_int_sge(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_sge'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_sge'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_sge'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_sge'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_sge'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_sge'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_sge'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_sge'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_sge'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp sge i8 undef, undef
   %V16I8 = icmp sge <16 x i8> undef, undef
@@ -169,28 +652,166 @@ define i32 @cmp_int_sge(i32 %arg) {
 }
 
 define i32 @cmp_int_uge(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_uge'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE42-LABEL: 'cmp_int_uge'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_uge'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_uge'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_uge'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_uge'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_uge'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_uge'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp uge i8 undef, undef
   %V16I8 = icmp uge <16 x i8> undef, undef
@@ -220,28 +841,189 @@ define i32 @cmp_int_uge(i32 %arg) {
 }
 
 define i32 @cmp_int_sgt(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_sgt'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_sgt'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_sgt'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_sgt'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_sgt'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_sgt'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_sgt'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_sgt'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_sgt'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp sgt i8 undef, undef
   %V16I8 = icmp sgt <16 x i8> undef, undef
@@ -271,28 +1053,189 @@ define i32 @cmp_int_sgt(i32 %arg) {
 }
 
 define i32 @cmp_int_ugt(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ugt'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_ugt'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_ugt'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ugt'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ugt'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ugt'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ugt'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ugt'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ugt'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ugt i8 undef, undef
   %V16I8 = icmp ugt <16 x i8> undef, undef
@@ -322,28 +1265,189 @@ define i32 @cmp_int_ugt(i32 %arg) {
 }
 
 define i32 @cmp_int_sle(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_sle'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_sle'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_sle'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_sle'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_sle'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_sle'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_sle'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_sle'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_sle'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp sle i8 undef, undef
   %V16I8 = icmp sle <16 x i8> undef, undef
@@ -373,28 +1477,166 @@ define i32 @cmp_int_sle(i32 %arg) {
 }
 
 define i32 @cmp_int_ule(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ule'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE42-LABEL: 'cmp_int_ule'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ule'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ule'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ule'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ule'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ule'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ule'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ule i8 undef, undef
   %V16I8 = icmp ule <16 x i8> undef, undef
@@ -424,28 +1666,189 @@ define i32 @cmp_int_ule(i32 %arg) {
 }
 
 define i32 @cmp_int_slt(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_slt'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_slt'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_slt'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_slt'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_slt'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_slt'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_slt'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_slt'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_slt'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp slt i8 undef, undef
   %V16I8 = icmp slt <16 x i8> undef, undef
@@ -475,28 +1878,189 @@ define i32 @cmp_int_slt(i32 %arg) {
 }
 
 define i32 @cmp_int_ult(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ult'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_ult'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_ult'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ult'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ult'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ult'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ult'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ult'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ult'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ult i8 undef, undef
   %V16I8 = icmp ult <16 x i8> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/X86/icmp-latency.ll b/llvm/test/Analysis/CostModel/X86/icmp-latency.ll
index e2e1499c6e316..b824480a8357c 100644
--- a/llvm/test/Analysis/CostModel/X86/icmp-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/icmp-latency.ll
@@ -1,43 +1,204 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.1 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s
-;
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2
+;
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
 
 define i32 @cmp_int_eq(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_eq'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_eq'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_eq'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_eq'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_eq'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_eq'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_eq'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_eq'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_eq'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp eq i8 undef, undef
   %V16I8 = icmp eq <16 x i8> undef, undef
@@ -67,28 +228,189 @@ define i32 @cmp_int_eq(i32 %arg) {
 }
 
 define i32 @cmp_int_ne(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ne'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_ne'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_ne'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ne'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ne'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ne'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ne'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ne'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ne'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ne i8 undef, undef
   %V16I8 = icmp ne <16 x i8> undef, undef
@@ -118,28 +440,189 @@ define i32 @cmp_int_ne(i32 %arg) {
 }
 
 define i32 @cmp_int_sge(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_sge'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_sge'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_sge'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_sge'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_sge'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_sge'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_sge'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_sge'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_sge'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp sge i8 undef, undef
   %V16I8 = icmp sge <16 x i8> undef, undef
@@ -169,28 +652,166 @@ define i32 @cmp_int_sge(i32 %arg) {
 }
 
 define i32 @cmp_int_uge(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_uge'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE42-LABEL: 'cmp_int_uge'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_uge'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_uge'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_uge'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_uge'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_uge'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_uge'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp uge i8 undef, undef
   %V16I8 = icmp uge <16 x i8> undef, undef
@@ -220,28 +841,189 @@ define i32 @cmp_int_uge(i32 %arg) {
 }
 
 define i32 @cmp_int_sgt(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_sgt'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_sgt'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_sgt'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_sgt'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_sgt'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_sgt'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_sgt'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_sgt'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_sgt'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp sgt i8 undef, undef
   %V16I8 = icmp sgt <16 x i8> undef, undef
@@ -271,28 +1053,189 @@ define i32 @cmp_int_sgt(i32 %arg) {
 }
 
 define i32 @cmp_int_ugt(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ugt'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_ugt'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_ugt'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ugt'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ugt'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ugt'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ugt'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ugt'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ugt'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ugt i8 undef, undef
   %V16I8 = icmp ugt <16 x i8> undef, undef
@@ -322,28 +1265,189 @@ define i32 @cmp_int_ugt(i32 %arg) {
 }
 
 define i32 @cmp_int_sle(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_sle'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_sle'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_sle'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_sle'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_sle'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_sle'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_sle'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_sle'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_sle'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp sle i8 undef, undef
   %V16I8 = icmp sle <16 x i8> undef, undef
@@ -373,28 +1477,166 @@ define i32 @cmp_int_sle(i32 %arg) {
 }
 
 define i32 @cmp_int_ule(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ule'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE42-LABEL: 'cmp_int_ule'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ule'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ule'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ule'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ule'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ule'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ule'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ule i8 undef, undef
   %V16I8 = icmp ule <16 x i8> undef, undef
@@ -424,28 +1666,189 @@ define i32 @cmp_int_ule(i32 %arg) {
 }
 
 define i32 @cmp_int_slt(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_slt'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_slt'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_slt'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_slt'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_slt'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_slt'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_slt'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_slt'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_slt'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp slt i8 undef, undef
   %V16I8 = icmp slt <16 x i8> undef, undef
@@ -475,28 +1878,189 @@ define i32 @cmp_int_slt(i32 %arg) {
 }
 
 define i32 @cmp_int_ult(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ult'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_ult'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_ult'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ult'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ult'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ult'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ult'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ult'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ult'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ult i8 undef, undef
   %V16I8 = icmp ult <16 x i8> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll
index 5495bec4c74a6..4414f4f1d8ebd 100644
--- a/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll
@@ -1,43 +1,204 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.1 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx2 | FileCheck %s
-;
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefixes=AVX1
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2
+;
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42
+; RUN: opt < %s -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
 
 define i32 @cmp_int_eq(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_eq'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_eq'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_eq'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_eq'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_eq'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_eq'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_eq'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_eq'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_eq'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp eq i8 undef, undef
   %V16I8 = icmp eq <16 x i8> undef, undef
@@ -67,28 +228,189 @@ define i32 @cmp_int_eq(i32 %arg) {
 }
 
 define i32 @cmp_int_ne(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ne'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_ne'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_ne'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ne'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ne'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ne'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ne'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ne'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ne'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ne i8 undef, undef
   %V16I8 = icmp ne <16 x i8> undef, undef
@@ -118,28 +440,189 @@ define i32 @cmp_int_ne(i32 %arg) {
 }
 
 define i32 @cmp_int_sge(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_sge'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_sge'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_sge'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_sge'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_sge'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_sge'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_sge'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_sge'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_sge'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp sge i8 undef, undef
   %V16I8 = icmp sge <16 x i8> undef, undef
@@ -169,28 +652,166 @@ define i32 @cmp_int_sge(i32 %arg) {
 }
 
 define i32 @cmp_int_uge(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_uge'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE42-LABEL: 'cmp_int_uge'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_uge'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_uge'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_uge'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_uge'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_uge'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_uge'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp uge i8 undef, undef
   %V16I8 = icmp uge <16 x i8> undef, undef
@@ -220,28 +841,189 @@ define i32 @cmp_int_uge(i32 %arg) {
 }
 
 define i32 @cmp_int_sgt(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_sgt'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_sgt'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_sgt'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_sgt'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_sgt'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_sgt'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_sgt'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_sgt'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_sgt'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp sgt i8 undef, undef
   %V16I8 = icmp sgt <16 x i8> undef, undef
@@ -271,28 +1053,189 @@ define i32 @cmp_int_sgt(i32 %arg) {
 }
 
 define i32 @cmp_int_ugt(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ugt'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_ugt'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_ugt'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ugt'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ugt'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ugt'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ugt'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ugt'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ugt'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ugt i8 undef, undef
   %V16I8 = icmp ugt <16 x i8> undef, undef
@@ -322,28 +1265,189 @@ define i32 @cmp_int_ugt(i32 %arg) {
 }
 
 define i32 @cmp_int_sle(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_sle'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_sle'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_sle'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_sle'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_sle'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_sle'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_sle'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_sle'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_sle'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp sle i8 undef, undef
   %V16I8 = icmp sle <16 x i8> undef, undef
@@ -373,28 +1477,166 @@ define i32 @cmp_int_sle(i32 %arg) {
 }
 
 define i32 @cmp_int_ule(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ule'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE42-LABEL: 'cmp_int_ule'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ule'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ule'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ule'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ule'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ule'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ule'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ule i8 undef, undef
   %V16I8 = icmp ule <16 x i8> undef, undef
@@ -424,28 +1666,189 @@ define i32 @cmp_int_ule(i32 %arg) {
 }
 
 define i32 @cmp_int_slt(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_slt'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_slt'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_slt'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_slt'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_slt'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_slt'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_slt'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_slt'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_slt'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp slt i8 undef, undef
   %V16I8 = icmp slt <16 x i8> undef, undef
@@ -475,28 +1878,189 @@ define i32 @cmp_int_slt(i32 %arg) {
 }
 
 define i32 @cmp_int_ult(i32 %arg) {
-; CHECK-LABEL: 'cmp_int_ult'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE2-LABEL: 'cmp_int_ult'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'cmp_int_ult'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'cmp_int_ult'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'cmp_int_ult'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512F-LABEL: 'cmp_int_ult'
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512BW-LABEL: 'cmp_int_ult'
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX1-LABEL: 'cmp_int_ult'
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; XOPAVX2-LABEL: 'cmp_int_ult'
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I8 = icmp ult i8 undef, undef
   %V16I8 = icmp ult <16 x i8> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/X86/icmp.ll b/llvm/test/Analysis/CostModel/X86/icmp.ll
index 73afc74c60999..bac40044b783e 100644
--- a/llvm/test/Analysis/CostModel/X86/icmp.ll
+++ b/llvm/test/Analysis/CostModel/X86/icmp.ll
@@ -181,13 +181,13 @@ define i32 @cmp_int_eq(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
@@ -485,13 +485,13 @@ define i32 @cmp_int_ne(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
@@ -789,13 +789,13 @@ define i32 @cmp_int_sge(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
@@ -1093,13 +1093,13 @@ define i32 @cmp_int_uge(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
@@ -1397,13 +1397,13 @@ define i32 @cmp_int_sgt(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
@@ -1701,13 +1701,13 @@ define i32 @cmp_int_ugt(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
@@ -2005,13 +2005,13 @@ define i32 @cmp_int_sle(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
@@ -2309,13 +2309,13 @@ define i32 @cmp_int_ule(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
@@ -2613,13 +2613,13 @@ define i32 @cmp_int_slt(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
@@ -2917,13 +2917,13 @@ define i32 @cmp_int_ult(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index 815629ed798c7..45727a603155c 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -56,17 +56,17 @@ define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
 ;
 ; LATE-LABEL: 'umul'
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-; LATE-NEXT:  Cost Model: Found an estimated cost of 93 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'umul'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'umul'
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 77 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
@@ -82,17 +82,17 @@ define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
 ;
 ; LATE-LABEL: 'smax'
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
-; LATE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'smax'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'smax'
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -290,17 +290,17 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x
 ;
 ; LATE-LABEL: 'fshl'
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; LATE-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 145 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fshl'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 122 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'fshl'
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 146 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 149 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)

diff  --git a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
index b5ba92f1b681f..6f2394f0fa9ea 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
@@ -183,7 +183,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'reduce_i16'
@@ -201,7 +201,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
@@ -271,7 +271,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'reduce_i8'
@@ -291,7 +291,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2   = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
index c82dac371c810..12484d378ac1f 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
@@ -183,7 +183,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'reduce_i16'
@@ -201,7 +201,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
@@ -271,7 +271,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'reduce_i8'
@@ -291,7 +291,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2   = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
index 610beb4352ef5..8f79832978145 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
@@ -183,7 +183,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'reduce_i16'
@@ -201,7 +201,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
@@ -271,7 +271,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'reduce_i8'
@@ -291,7 +291,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2   = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
index 6215b510fb766..7948fc18a3a2f 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
@@ -183,7 +183,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'reduce_i16'
@@ -201,7 +201,7 @@ define i32 @reduce_i16(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2  = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
@@ -271,7 +271,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'reduce_i8'
@@ -291,7 +291,7 @@ define i32 @reduce_i8(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %V2   = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)


        


More information about the llvm-commits mailing list