[llvm] d2c093e - [CostModel][X86] Add i64 mul cost for avx512 as 1cy

Haohai Wen via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 7 19:40:21 PST 2021


Author: Haohai Wen
Date: 2021-12-08T11:29:08+08:00
New Revision: d2c093e79d146f837ce0117f9b331e9e4508d2cf

URL: https://github.com/llvm/llvm-project/commit/d2c093e79d146f837ce0117f9b331e9e4508d2cf
DIFF: https://github.com/llvm/llvm-project/commit/d2c093e79d146f837ce0117f9b331e9e4508d2cf.diff

LOG: [CostModel][X86] Add i64 mul cost for avx512 as 1cy

i64 mul cost is 1cy for all cpu that support avx512. Currently
all X86 cpu uses i64 mul cost in X64 cost table which is not
true for cpu that support avx512 (skx, icx).

Reviewed By: pengfei, RKSimon

Differential Revision: https://reviews.llvm.org/D115016

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/arith-fix.ll
    llvm/test/Analysis/CostModel/X86/arith-overflow.ll
    llvm/test/Analysis/CostModel/X86/arith.ll
    llvm/test/Analysis/CostModel/X86/mul.ll
    llvm/test/Analysis/CostModel/X86/rem.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 869762b351961..c20241c918710 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -660,6 +660,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
     { ISD::MUL,     MVT::v8i32,      1 }, // pmulld (Skylake from agner.org)
     { ISD::MUL,     MVT::v4i32,      1 }, // pmulld (Skylake from agner.org)
     { ISD::MUL,     MVT::v8i64,      6 }, // 3*pmuludq/3*shift/2*add
+    { ISD::MUL,     MVT::i64,        1 }, // Skylake from http://www.agner.org/
 
     { ISD::FNEG,    MVT::v8f64,      1 }, // Skylake from http://www.agner.org/
     { ISD::FADD,    MVT::v8f64,      1 }, // Skylake from http://www.agner.org/

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll
index 708ea0f6aa3cc..8a7552a36ca25 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll
@@ -109,11 +109,11 @@ define i32 @smul(i32 %arg) {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'smul'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
@@ -128,11 +128,11 @@ define i32 @smul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'smul'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
@@ -147,11 +147,11 @@ define i32 @smul(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512DQ-LABEL: 'smul'
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
@@ -343,11 +343,11 @@ define i32 @umul(i32 %arg) {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'umul'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
@@ -362,11 +362,11 @@ define i32 @umul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'umul'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
@@ -381,11 +381,11 @@ define i32 @umul(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512DQ-LABEL: 'umul'
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index 7a8b7f1aa5ca9..7d5a8763bfdee 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1066,9 +1066,9 @@ define i32 @smul(i32 %arg) {
 ;
 ; AVX512F-LABEL: 'smul'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -1085,9 +1085,9 @@ define i32 @smul(i32 %arg) {
 ;
 ; AVX512BW-LABEL: 'smul'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -1104,9 +1104,9 @@ define i32 @smul(i32 %arg) {
 ;
 ; AVX512DQ-LABEL: 'smul'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -1304,9 +1304,9 @@ define i32 @umul(i32 %arg) {
 ;
 ; AVX512F-LABEL: 'umul'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -1323,9 +1323,9 @@ define i32 @umul(i32 %arg) {
 ;
 ; AVX512BW-LABEL: 'umul'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -1342,9 +1342,9 @@ define i32 @umul(i32 %arg) {
 ;
 ; AVX512DQ-LABEL: 'umul'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 93 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/arith.ll b/llvm/test/Analysis/CostModel/X86/arith.ll
index eb7dbd0a46e72..2f1eaa7792b72 100644
--- a/llvm/test/Analysis/CostModel/X86/arith.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith.ll
@@ -1018,7 +1018,7 @@ define i32 @mul(i32 %arg) {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = mul <2 x i64> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = mul <4 x i64> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = mul <8 x i64> undef, undef
@@ -1040,7 +1040,7 @@ define i32 @mul(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = mul <2 x i64> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = mul <4 x i64> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = mul <8 x i64> undef, undef
@@ -1062,7 +1062,7 @@ define i32 @mul(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512DQ-LABEL: 'mul'
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = mul <2 x i64> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = mul <4 x i64> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = mul <8 x i64> undef, undef

diff  --git a/llvm/test/Analysis/CostModel/X86/mul.ll b/llvm/test/Analysis/CostModel/X86/mul.ll
index 511767b2ae7c6..59c0d323118da 100644
--- a/llvm/test/Analysis/CostModel/X86/mul.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul.ll
@@ -460,7 +460,7 @@ define i32 @mul_constnegpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_constnegpow2'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
@@ -479,7 +479,7 @@ define i32 @mul_constnegpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_constnegpow2'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -8, i64 -16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
@@ -636,7 +636,7 @@ define i32 @mul_uniformconstnegpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_uniformconstnegpow2'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
@@ -655,7 +655,7 @@ define i32 @mul_uniformconstnegpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_uniformconstnegpow2'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, -16
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>

diff  --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll
index c72cb9c425bef..991430fba6b67 100644
--- a/llvm/test/Analysis/CostModel/X86/rem.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem.ll
@@ -1,35 +1,73 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
-;
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,GLM
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
+;
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,BTVER2
 
 define i32 @srem() {
-; CHECK-LABEL: 'srem'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE-LABEL: 'srem'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'srem'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'srem'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = srem <4 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = srem <8 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = srem <16 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = srem <8 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = srem <16 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = srem <32 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = srem <16 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = srem <32 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = srem <64 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, undef
   %V2i64 = srem <2 x i64> undef, undef
@@ -55,24 +93,62 @@ define i32 @srem() {
 }
 
 define i32 @urem() {
-; CHECK-LABEL: 'urem'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE-LABEL: 'urem'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'urem'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'urem'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i32 = urem <4 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i32 = urem <8 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i32 = urem <16 x i32> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i16 = urem <8 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i16 = urem <16 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i16 = urem <32 x i16> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 960 for instruction: %V16i8 = urem <16 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1920 for instruction: %V32i8 = urem <32 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3840 for instruction: %V64i8 = urem <64 x i8> undef, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, undef
   %V2i64 = urem <2 x i64> undef, undef
@@ -194,10 +270,10 @@ define i32 @srem_const() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_const'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 6, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 6, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -213,10 +289,10 @@ define i32 @srem_const() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_const'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 6, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 6, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -370,10 +446,10 @@ define i32 @urem_const() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'urem_const'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 6, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 6, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -389,10 +465,10 @@ define i32 @urem_const() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'urem_const'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 6, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 6, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 4, i64 5, i64 6, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, <i32 4, i32 5, i32 6, i32 7>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -508,10 +584,10 @@ define i32 @srem_uniformconst() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconst'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
@@ -527,10 +603,10 @@ define i32 @srem_uniformconst() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconst'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
@@ -646,10 +722,10 @@ define i32 @urem_uniformconst() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'urem_uniformconst'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
@@ -665,10 +741,10 @@ define i32 @urem_uniformconst() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'urem_uniformconst'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
@@ -822,7 +898,7 @@ define i32 @srem_constpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_constpow2'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, <i64 8, i64 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
@@ -841,7 +917,7 @@ define i32 @srem_constpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_constpow2'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, <i64 8, i64 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, <i64 2, i64 4, i64 8, i64 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
@@ -1117,7 +1193,7 @@ define i32 @srem_uniformconstpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstpow2'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
@@ -1136,7 +1212,7 @@ define i32 @srem_uniformconstpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstpow2'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
@@ -1412,10 +1488,10 @@ define i32 @srem_constnegpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_constnegpow2'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -8, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
@@ -1431,10 +1507,10 @@ define i32 @srem_constnegpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_constnegpow2'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -8, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
@@ -1588,10 +1664,10 @@ define i32 @urem_constnegpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'urem_constnegpow2'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -8, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
@@ -1607,10 +1683,10 @@ define i32 @urem_constnegpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'urem_constnegpow2'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -8, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -8, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -2, i64 -4, i64 -8, i64 -16, i64 -32, i64 -64, i64 -128, i64 -256>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -2, i32 -4, i32 -8, i32 -16, i32 -32, i32 -64, i32 -128, i32 -256>
@@ -1726,10 +1802,10 @@ define i32 @srem_uniformconstnegpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstnegpow2'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
@@ -1745,10 +1821,10 @@ define i32 @srem_uniformconstnegpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstnegpow2'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
@@ -1864,10 +1940,10 @@ define i32 @urem_uniformconstnegpow2() {
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'urem_uniformconstnegpow2'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
@@ -1883,10 +1959,10 @@ define i32 @urem_uniformconstnegpow2() {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'urem_uniformconstnegpow2'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>


        


More information about the llvm-commits mailing list