[llvm] 59b9e6f - [X86] Update costs for truncates from less than 128-bit vectors to vXi1 on pre-avx512 targets
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 28 11:36:13 PDT 2020
Author: Craig Topper
Date: 2020-04-28T11:35:41-07:00
New Revision: 59b9e6fe7626da03b1146b0f5ad19753cc829de8
URL: https://github.com/llvm/llvm-project/commit/59b9e6fe7626da03b1146b0f5ad19753cc829de8
DIFF: https://github.com/llvm/llvm-project/commit/59b9e6fe7626da03b1146b0f5ad19753cc829de8.diff
LOG: [X86] Update costs for truncates from less than 128-bit vectors to vXi1 on pre-avx512 targets
vXi1 types are legalized by promoting, but the narrow vectors
are legalized by widening. This results in some truncates turning
into any_extends.
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll
llvm/test/Analysis/CostModel/X86/trunc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 21c9b7566bd4..7d9fa4f8b3e2 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1439,6 +1439,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 3 }, // sext+vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, // zmm vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // zmm vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // zmm vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 }, // vpslld+vptestmd
@@ -1606,6 +1607,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 8 }, // split+2*v8i16
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, // vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // vpsllq+vptestmq
@@ -1825,6 +1827,11 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
+ // These truncates end up widening elements.
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 1 }, // PMOVXZBQ
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 1 }, // PMOVXZWQ
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 1 }, // PMOVXZBD
+
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 1 },
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 1 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 },
@@ -1915,6 +1922,14 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 },
+ // These truncates are really widening elements.
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 1 }, // PSHUFD
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // PUNPCKLWD+DQ
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // PUNPCKLBW+WD+PSHUFD
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 1 }, // PUNPCKLWD
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // PUNPCKLBW+WD
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 1 }, // PUNPCKLBW
+
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // PAND+PACKUSWB
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, // PAND+PACKUSWB
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, // PAND+PACKUSWB
diff --git a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll
index e8e101714bda..0639a4875909 100644
--- a/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll
+++ b/llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll
@@ -528,21 +528,21 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1>
@@ -555,7 +555,7 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
-; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
@@ -582,7 +582,7 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
-; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
@@ -609,7 +609,7 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
; SKX256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
-; SKX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
@@ -636,7 +636,7 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
; SKX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
-; SKX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll
index 6523e3e15dba..cf56adbf87ec 100644
--- a/llvm/test/Analysis/CostModel/X86/trunc.ll
+++ b/llvm/test/Analysis/CostModel/X86/trunc.ll
@@ -354,32 +354,86 @@ define i32 @trunc_vXi8() {
}
define i32 @trunc_vXi1() {
-; SSE-LABEL: 'trunc_vXi1'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE2-LABEL: 'trunc_vXi1'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSSE3-LABEL: 'trunc_vXi1'
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1>
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'trunc_vXi1'
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'trunc_vXi1'
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
@@ -388,21 +442,21 @@ define i32 @trunc_vXi1() {
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1>
@@ -415,7 +469,7 @@ define i32 @trunc_vXi1() {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
@@ -442,7 +496,7 @@ define i32 @trunc_vXi1() {
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
More information about the llvm-commits
mailing list