[llvm] r351810 - [CostModel][X86] Add ICMP Predicate specific costs

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 22 04:29:38 PST 2019


Author: rksimon
Date: Tue Jan 22 04:29:38 2019
New Revision: 351810

URL: http://llvm.org/viewvc/llvm-project?rev=351810&view=rev
Log:
[CostModel][X86] Add ICMP Predicate specific costs

First step towards PR40376, this patch adds support for getCmpSelInstrCost to use the (optional) Instruction CmpInst predicate to indicate the type of integer comparison we're performing and alter the costs accordingly.

Differential Revision: https://reviews.llvm.org/D57013

Modified:
    llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/trunk/test/Analysis/CostModel/X86/icmp.ll

Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=351810&r1=351809&r2=351810&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Tue Jan 22 04:29:38 2019
@@ -1650,6 +1650,47 @@ int X86TTIImpl::getCmpSelInstrCost(unsig
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
+  unsigned ExtraCost = 0;
+  if (I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) {
+    // Some vector comparison predicates cost extra instructions.
+    if (MTy.isVector() &&
+        !((ST->hasXOP() && (!ST->hasAVX2() || MTy.is128BitVector())) ||
+          (ST->hasAVX512() && 32 <= MTy.getScalarSizeInBits()) ||
+          ST->hasBWI())) {
+      switch (cast<CmpInst>(I)->getPredicate()) {
+      case CmpInst::Predicate::ICMP_NE:
+        // xor(cmpeq(x,y),-1)
+        ExtraCost = 1;
+        break;
+      case CmpInst::Predicate::ICMP_SGE:
+      case CmpInst::Predicate::ICMP_SLE:
+        // xor(cmpgt(x,y),-1)
+        ExtraCost = 1;
+        break;
+      case CmpInst::Predicate::ICMP_ULT:
+      case CmpInst::Predicate::ICMP_UGT:
+        // cmpgt(xor(x,signbit),xor(y,signbit))
+        // xor(cmpeq(pmaxu(x,y),x),-1)
+        ExtraCost = 2;
+        break;
+      case CmpInst::Predicate::ICMP_ULE:
+      case CmpInst::Predicate::ICMP_UGE:
+        if ((ST->hasSSE41() && MTy.getScalarSizeInBits() == 32) ||
+            (ST->hasSSE2() && MTy.getScalarSizeInBits() < 32)) {
+          // cmpeq(psubus(x,y),0)
+          // cmpeq(pminu(x,y),x)
+          ExtraCost = 1;
+        } else {
+          // xor(cmpgt(xor(x,signbit),xor(y,signbit)),-1)
+          ExtraCost = 3;
+        }
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
   static const CostTblEntry AVX512BWCostTbl[] = {
     { ISD::SETCC,   MVT::v32i16,  1 },
     { ISD::SETCC,   MVT::v64i8,   1 },
@@ -1738,35 +1779,35 @@ int X86TTIImpl::getCmpSelInstrCost(unsig
 
   if (ST->hasBWI())
     if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasAVX512())
     if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasAVX2())
     if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasAVX())
     if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasSSE42())
     if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasSSE41())
     if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasSSE2())
     if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasSSE1())
     if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
 }

Modified: llvm/trunk/test/Analysis/CostModel/X86/icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/icmp.ll?rev=351810&r1=351809&r2=351810&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/icmp.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/icmp.ll Tue Jan 22 04:29:38 2019
@@ -322,176 +322,176 @@ define i32 @cmp_int_eq(i32 %arg) {
 define i32 @cmp_int_ne(i32 %arg) {
 ; SSE2-LABEL: 'cmp_int_ne'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_ne'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_ne'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_ne'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_ne'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'cmp_int_ne'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'cmp_int_ne'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'cmp_int_ne'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
@@ -553,47 +553,47 @@ define i32 @cmp_int_ne(i32 %arg) {
 ; XOPAVX2-LABEL: 'cmp_int_ne'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'cmp_int_ne'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = icmp ne i8 undef, undef
@@ -626,176 +626,176 @@ define i32 @cmp_int_ne(i32 %arg) {
 define i32 @cmp_int_sge(i32 %arg) {
 ; SSE2-LABEL: 'cmp_int_sge'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_sge'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_sge'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_sge'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_sge'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'cmp_int_sge'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'cmp_int_sge'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'cmp_int_sge'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
@@ -857,47 +857,47 @@ define i32 @cmp_int_sge(i32 %arg) {
 ; XOPAVX2-LABEL: 'cmp_int_sge'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'cmp_int_sge'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = icmp sge i8 undef, undef
@@ -930,176 +930,176 @@ define i32 @cmp_int_sge(i32 %arg) {
 define i32 @cmp_int_uge(i32 %arg) {
 ; SSE2-LABEL: 'cmp_int_uge'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_uge'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_uge'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_uge'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_uge'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'cmp_int_uge'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'cmp_int_uge'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'cmp_int_uge'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
@@ -1161,47 +1161,47 @@ define i32 @cmp_int_uge(i32 %arg) {
 ; XOPAVX2-LABEL: 'cmp_int_uge'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'cmp_int_uge'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = icmp uge i8 undef, undef
@@ -1538,176 +1538,176 @@ define i32 @cmp_int_sgt(i32 %arg) {
 define i32 @cmp_int_ugt(i32 %arg) {
 ; SSE2-LABEL: 'cmp_int_ugt'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_ugt'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_ugt'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_ugt'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_ugt'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'cmp_int_ugt'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'cmp_int_ugt'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'cmp_int_ugt'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
@@ -1769,47 +1769,47 @@ define i32 @cmp_int_ugt(i32 %arg) {
 ; XOPAVX2-LABEL: 'cmp_int_ugt'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'cmp_int_ugt'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = icmp ugt i8 undef, undef
@@ -1842,176 +1842,176 @@ define i32 @cmp_int_ugt(i32 %arg) {
 define i32 @cmp_int_sle(i32 %arg) {
 ; SSE2-LABEL: 'cmp_int_sle'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_sle'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_sle'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_sle'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_sle'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'cmp_int_sle'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'cmp_int_sle'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'cmp_int_sle'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
@@ -2073,47 +2073,47 @@ define i32 @cmp_int_sle(i32 %arg) {
 ; XOPAVX2-LABEL: 'cmp_int_sle'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'cmp_int_sle'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = icmp sle i8 undef, undef
@@ -2146,176 +2146,176 @@ define i32 @cmp_int_sle(i32 %arg) {
 define i32 @cmp_int_ule(i32 %arg) {
 ; SSE2-LABEL: 'cmp_int_ule'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_ule'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_ule'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_ule'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_ule'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'cmp_int_ule'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'cmp_int_ule'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'cmp_int_ule'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
@@ -2377,47 +2377,47 @@ define i32 @cmp_int_ule(i32 %arg) {
 ; XOPAVX2-LABEL: 'cmp_int_ule'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'cmp_int_ule'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = icmp ule i8 undef, undef
@@ -2754,176 +2754,176 @@ define i32 @cmp_int_slt(i32 %arg) {
 define i32 @cmp_int_ult(i32 %arg) {
 ; SSE2-LABEL: 'cmp_int_ult'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE3-LABEL: 'cmp_int_ult'
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; SSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'cmp_int_ult'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE41-LABEL: 'cmp_int_ult'
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; SSE41-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE41-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; SSE41-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'cmp_int_ult'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'cmp_int_ult'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'cmp_int_ult'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'cmp_int_ult'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
@@ -2985,47 +2985,47 @@ define i32 @cmp_int_ult(i32 %arg) {
 ; XOPAVX2-LABEL: 'cmp_int_ult'
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'cmp_int_ult'
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = icmp ult i8 undef, undef




More information about the llvm-commits mailing list