[llvm] ad8e4dd - [CostModel][X86] Add and/or/xor general cost kinds support

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 31 09:26:59 PDT 2022


Author: Simon Pilgrim
Date: 2022-08-31T17:26:05+01:00
New Revision: ad8e4dd2ade4df7946238ee9fc8d3abfe32e2edf

URL: https://github.com/llvm/llvm-project/commit/ad8e4dd2ade4df7946238ee9fc8d3abfe32e2edf
DIFF: https://github.com/llvm/llvm-project/commit/ad8e4dd2ade4df7946238ee9fc8d3abfe32e2edf.diff

LOG: [CostModel][X86] Add and/or/xor general cost kinds support

Account for double-pumping on early AVX1/AVX2 targets

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/arith-int-latency.ll
    llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
    llvm/test/Analysis/CostModel/X86/rem-latency.ll
    llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 0dab5189d4883..52f9413f49f4c 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -685,6 +685,21 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
     { ISD::SUB,     MVT::v64i8,   {  3 } }, // 2*psubb + split
     { ISD::SUB,     MVT::v32i16,  {  3 } }, // 2*psubw + split
 
+    { ISD::AND,     MVT::v32i8,   {  1,  1, 1, 1 } },
+    { ISD::AND,     MVT::v16i16,  {  1,  1, 1, 1 } },
+    { ISD::AND,     MVT::v8i32,   {  1,  1, 1, 1 } },
+    { ISD::AND,     MVT::v4i64,   {  1,  1, 1, 1 } },
+
+    { ISD::OR,      MVT::v32i8,   {  1,  1, 1, 1 } },
+    { ISD::OR,      MVT::v16i16,  {  1,  1, 1, 1 } },
+    { ISD::OR,      MVT::v8i32,   {  1,  1, 1, 1 } },
+    { ISD::OR,      MVT::v4i64,   {  1,  1, 1, 1 } },
+
+    { ISD::XOR,     MVT::v32i8,   {  1,  1, 1, 1 } },
+    { ISD::XOR,     MVT::v16i16,  {  1,  1, 1, 1 } },
+    { ISD::XOR,     MVT::v8i32,   {  1,  1, 1, 1 } },
+    { ISD::XOR,     MVT::v4i64,   {  1,  1, 1, 1 } },
+
     { ISD::MUL,     MVT::v16i32,  {  1 } }, // pmulld (Skylake from agner.org)
     { ISD::MUL,     MVT::v8i32,   {  1 } }, // pmulld (Skylake from agner.org)
     { ISD::MUL,     MVT::v4i32,   {  1 } }, // pmulld (Skylake from agner.org)
@@ -905,6 +920,21 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
     { ISD::MUL,     MVT::v8i32,   {  5 } }, // BTVER2 from http://www.agner.org/
     { ISD::MUL,     MVT::v4i64,   { 12 } },
 
+    { ISD::AND,     MVT::v32i8,   {  1,  1, 1, 2 } }, // vandps
+    { ISD::AND,     MVT::v16i16,  {  1,  1, 1, 2 } }, // vandps
+    { ISD::AND,     MVT::v8i32,   {  1,  1, 1, 2 } }, // vandps
+    { ISD::AND,     MVT::v4i64,   {  1,  1, 1, 2 } }, // vandps
+
+    { ISD::OR,      MVT::v32i8,   {  1,  1, 1, 2 } }, // vorps
+    { ISD::OR,      MVT::v16i16,  {  1,  1, 1, 2 } }, // vorps
+    { ISD::OR,      MVT::v8i32,   {  1,  1, 1, 2 } }, // vorps
+    { ISD::OR,      MVT::v4i64,   {  1,  1, 1, 2 } }, // vorps
+
+    { ISD::XOR,     MVT::v32i8,   {  1,  1, 1, 2 } }, // vxorps
+    { ISD::XOR,     MVT::v16i16,  {  1,  1, 1, 2 } }, // vxorps
+    { ISD::XOR,     MVT::v8i32,   {  1,  1, 1, 2 } }, // vxorps
+    { ISD::XOR,     MVT::v4i64,   {  1,  1, 1, 2 } }, // vxorps
+
     { ISD::SUB,     MVT::v32i8,   {  4 } },
     { ISD::ADD,     MVT::v32i8,   {  4 } },
     { ISD::SUB,     MVT::v16i16,  {  4 } },
@@ -1024,6 +1054,21 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRA,  MVT::v4i32,  { 12 } }, // Shift each lane + blend.
     { ISD::SRA,  MVT::v2i64,  {  8 } }, // srl/xor/sub splat+shuffle sequence.
 
+    { ISD::AND,  MVT::v16i8,  {  1, 1, 1, 1 } }, // pand
+    { ISD::AND,  MVT::v8i16,  {  1, 1, 1, 1 } }, // pand
+    { ISD::AND,  MVT::v4i32,  {  1, 1, 1, 1 } }, // pand
+    { ISD::AND,  MVT::v2i64,  {  1, 1, 1, 1 } }, // pand
+
+    { ISD::OR,   MVT::v16i8,  {  1, 1, 1, 1 } }, // por
+    { ISD::OR,   MVT::v8i16,  {  1, 1, 1, 1 } }, // por
+    { ISD::OR,   MVT::v4i32,  {  1, 1, 1, 1 } }, // por
+    { ISD::OR,   MVT::v2i64,  {  1, 1, 1, 1 } }, // por
+
+    { ISD::XOR,  MVT::v16i8,  {  1, 1, 1, 1 } }, // pxor
+    { ISD::XOR,  MVT::v8i16,  {  1, 1, 1, 1 } }, // pxor
+    { ISD::XOR,  MVT::v4i32,  {  1, 1, 1, 1 } }, // pxor
+    { ISD::XOR,  MVT::v2i64,  {  1, 1, 1, 1 } }, // pxor
+
     { ISD::MUL,  MVT::v8i16,  {  1 } }, // pmullw
     { ISD::MUL,  MVT::v4i32,  {  6 } }, // 3*pmuludq/4*shuffle
     { ISD::MUL,  MVT::v2i64,  {  8 } }, // 3*pmuludq/3*shift/2*add

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-int-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-int-latency.ll
index 6d59a0745e223..ecc0635e0fabf 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-int-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-int-latency.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+ssse3 | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
 ;
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX
 
 define i32 @add(i32 %arg) {
 ; CHECK-LABEL: 'add'
@@ -98,31 +98,83 @@ define i32 @sub(i32 %arg) {
 }
 
 define i32 @or(i32 %arg) {
-; CHECK-LABEL: 'or'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'or'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = or <4 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = or <8 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = or <8 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = or <16 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = or <16 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = or <32 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = or <32 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = or <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = or <32 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = or <64 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'or'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = or <8 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = or <16 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = or <32 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = or <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I1 = or <64 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'or'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = or i64 undef, undef
   %V2I64 = or <2 x i64> undef, undef
@@ -156,31 +208,83 @@ define i32 @or(i32 %arg) {
 }
 
 define i32 @xor(i32 %arg) {
-; CHECK-LABEL: 'xor'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'xor'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = xor <4 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = xor <8 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = xor <8 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = xor <16 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = xor <16 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = xor <32 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = xor <32 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = xor <64 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'xor'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = xor <8 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = xor <16 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = xor <32 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I1 = xor <64 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'xor'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = xor i64 undef, undef
   %V2I64 = xor <2 x i64> undef, undef
@@ -214,31 +318,83 @@ define i32 @xor(i32 %arg) {
 }
 
 define i32 @and(i32 %arg) {
-; CHECK-LABEL: 'and'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'and'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = and <4 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = and <8 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = and <8 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = and <16 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = and <16 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = and <32 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = and <32 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = and <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = and <32 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = and <64 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'and'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = and <8 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = and <16 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = and <32 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = and <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I1 = and <64 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'and'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = and i64 undef, undef
   %V2I64 = and <2 x i64> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll
index e963fa0c2cbec..88b604495db72 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-int-sizelatency.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+ssse3 | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512
 ;
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX
 
 define i32 @add(i32 %arg) {
 ; CHECK-LABEL: 'add'
@@ -98,31 +98,83 @@ define i32 @sub(i32 %arg) {
 }
 
 define i32 @or(i32 %arg) {
-; CHECK-LABEL: 'or'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'or'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = or <4 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = or <8 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = or <8 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = or <16 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = or <16 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = or <32 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = or <32 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = or <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = or <32 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = or <64 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'or'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = or <4 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = or <8 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = or <8 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = or <16 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = or <16 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = or <32 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = or <32 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = or <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = or <32 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = or <64 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'or'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = or i64 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = or <2 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = or <4 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = or <8 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = or i32 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = or <4 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = or <8 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = or <16 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = or i16 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = or <8 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = or <16 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = or <32 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = or i8 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = or i64 undef, undef
   %V2I64 = or <2 x i64> undef, undef
@@ -156,31 +208,83 @@ define i32 @or(i32 %arg) {
 }
 
 define i32 @xor(i32 %arg) {
-; CHECK-LABEL: 'xor'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'xor'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = xor <4 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = xor <8 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = xor <8 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = xor <16 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = xor <16 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = xor <32 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = xor <32 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = xor <64 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'xor'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = xor <4 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = xor <8 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = xor <8 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = xor <16 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = xor <16 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = xor <32 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = xor <32 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = xor <64 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'xor'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = xor i64 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = xor <2 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = xor <4 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = xor <8 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = xor i32 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = xor <4 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = xor <8 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = xor <16 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = xor i16 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = xor <8 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = xor <16 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = xor <32 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = xor i8 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = xor i64 undef, undef
   %V2I64 = xor <2 x i64> undef, undef
@@ -214,31 +318,83 @@ define i32 @xor(i32 %arg) {
 }
 
 define i32 @and(i32 %arg) {
-; CHECK-LABEL: 'and'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'and'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = and <4 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = and <8 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = and <8 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = and <16 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = and <16 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = and <32 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = and <32 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = and <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = and <32 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = and <64 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'and'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = and <4 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = and <8 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = and <8 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = and <16 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = and <16 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = and <32 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = and <32 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = and <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = and <32 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = and <64 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'and'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = and i64 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = and <2 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = and <4 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = and <8 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = and i32 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = and <4 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = and <8 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = and <16 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = and i16 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = and <8 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = and <16 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = and <32 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = and i8 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = and i64 undef, undef
   %V2I64 = and <2 x i64> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index 2c86691127a13..bd0ad4c92c086 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -290,7 +290,7 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x
 ;
 ; LATE-LABEL: 'fshl'
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; LATE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fshl'
@@ -300,7 +300,7 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x
 ;
 ; SIZE_LATE-LABEL: 'fshl'
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)

diff  --git a/llvm/test/Analysis/CostModel/X86/rem-latency.ll b/llvm/test/Analysis/CostModel/X86/rem-latency.ll
index c73047b9530ea..d93ecf2e9867a 100644
--- a/llvm/test/Analysis/CostModel/X86/rem-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem-latency.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
 ;
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX
 
 define i32 @srem() {
 ; CHECK-LABEL: 'srem'
@@ -313,24 +313,62 @@ define i32 @srem_constpow2() {
 }
 
 define i32 @urem_constpow2() {
-; CHECK-LABEL: 'urem_constpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'urem_constpow2'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'urem_constpow2'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'urem_constpow2'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 16
   %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
@@ -399,24 +437,62 @@ define i32 @srem_uniformconstpow2() {
 }
 
 define i32 @urem_uniformconstpow2() {
-; CHECK-LABEL: 'urem_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'urem_uniformconstpow2'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'urem_uniformconstpow2'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'urem_uniformconstpow2'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 16
   %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>

diff  --git a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll
index 72cb936191d22..ca000d3e2395f 100644
--- a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
 ;
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX
 
 define i32 @srem() {
 ; CHECK-LABEL: 'srem'
@@ -313,24 +313,62 @@ define i32 @srem_constpow2() {
 }
 
 define i32 @urem_constpow2() {
-; CHECK-LABEL: 'urem_constpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'urem_constpow2'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'urem_constpow2'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'urem_constpow2'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 2, i32 4, i32 8, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64, i32 128, i32 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 16
   %V2i64 = urem <2 x i64> undef, <i64 8, i64 16>
@@ -399,24 +437,62 @@ define i32 @srem_uniformconstpow2() {
 }
 
 define i32 @urem_uniformconstpow2() {
-; CHECK-LABEL: 'urem_uniformconstpow2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; SSE-LABEL: 'urem_uniformconstpow2'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'urem_uniformconstpow2'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'urem_uniformconstpow2'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 16
   %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>


        


More information about the llvm-commits mailing list