[llvm] r347479 - [LegalizeVectorTypes] Have SplitVecOp_TruncateHelper fall back to SplitVecOp_UnaryOp if splitting the output type would be a legal type.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 22 14:56:52 PST 2018


Author: ctopper
Date: Thu Nov 22 14:56:52 2018
New Revision: 347479

URL: http://llvm.org/viewvc/llvm-project?rev=347479&view=rev
Log:
[LegalizeVectorTypes] Have SplitVecOp_TruncateHelper fall back to SplitVecOp_UnaryOp if splitting the output type would be a legal type.

SplitVecOp_TruncateHelper tries to introduce a multilevel truncate to avoid scalarization. But if splitting the result type would still be a legal type we don't need to do that.

The comment block at the top of the function implied that this was already implemented. I looked back through the history and it doesn't look to have ever been checked.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/trunk/test/CodeGen/X86/avg.ll
    llvm/trunk/test/CodeGen/X86/avx512-trunc-widen.ll
    llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
    llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=347479&r1=347478&r2=347479&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Thu Nov 22 14:56:52 2018
@@ -2247,10 +2247,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_Tru
   unsigned InElementSize = InVT.getScalarSizeInBits();
   unsigned OutElementSize = OutVT.getScalarSizeInBits();
 
+  // Determine the split output VT. If its legal we can just split dirctly.
+  EVT LoOutVT, HiOutVT;
+  std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT);
+  assert(LoOutVT == HiOutVT && "Unequal split?");
+
   // If the input elements are only 1/2 the width of the result elements,
   // just use the normal splitting. Our trick only work if there's room
   // to split more than once.
-  if (InElementSize <= OutElementSize * 2)
+  if (isTypeLegal(LoOutVT) ||
+      InElementSize <= OutElementSize * 2)
     return SplitVecOp_UnaryOp(N);
   SDLoc DL(N);
 

Modified: llvm/trunk/test/CodeGen/X86/avg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avg.ll?rev=347479&r1=347478&r2=347479&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avg.ll Thu Nov 22 14:56:52 2018
@@ -463,28 +463,13 @@ define void @avg_v48i8(<48 x i8>* %a, <4
 ;
 ; AVX512BW-LABEL: avg_v48i8:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
-; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
-; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
-; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
-; AVX512BW-NEXT:    vpaddd %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
-; AVX512BW-NEXT:    vpaddd %zmm3, %zmm1, %zmm1
-; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
-; AVX512BW-NEXT:    vpaddd %zmm3, %zmm2, %zmm2
-; AVX512BW-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3
-; AVX512BW-NEXT:    vpsubd %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpsubd %zmm3, %zmm1, %zmm1
-; AVX512BW-NEXT:    vpsubd %zmm3, %zmm2, %zmm2
-; AVX512BW-NEXT:    vpsrld $1, %zmm2, %zmm2
-; AVX512BW-NEXT:    vpsrld $1, %zmm1, %zmm1
-; AVX512BW-NEXT:    vpsrld $1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
-; AVX512BW-NEXT:    vpmovdw %zmm1, %ymm1
-; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT:    vpmovdw %zmm2, %ymm1
-; AVX512BW-NEXT:    vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX512BW-NEXT:    vmovdqa 16(%rdi), %xmm1
+; AVX512BW-NEXT:    vmovdqa 32(%rdi), %xmm2
+; AVX512BW-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
+; AVX512BW-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
+; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    vpavgb 32(%rsi), %xmm2, %xmm1
 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
 ; AVX512BW-NEXT:    vextracti32x4 $2, %zmm1, (%rax)

Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc-widen.ll?rev=347479&r1=347478&r2=347479&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc-widen.ll Thu Nov 22 14:56:52 2018
@@ -655,23 +655,12 @@ define void @usat_trunc_qw_512_mem(<8 x
 }
 
 define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
-; KNL-LABEL: usat_trunc_db_1024:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
-; KNL-NEXT:    vpmovusdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: usat_trunc_db_1024:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; SKX-NEXT:    vpminud %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpminud %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmovdw %zmm0, %ymm0
-; SKX-NEXT:    vpmovdw %zmm1, %ymm1
-; SKX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; SKX-NEXT:    vpmovwb %zmm0, %ymm0
-; SKX-NEXT:    retq
+; ALL-LABEL: usat_trunc_db_1024:
+; ALL:       ## %bb.0:
+; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
+; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %x6 = trunc <32 x i32> %x5 to <32 x i8>
@@ -679,26 +668,14 @@ define <32 x i8> @usat_trunc_db_1024(<32
 }
 
 define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
-; KNL-LABEL: usat_trunc_db_1024_mem:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
-; KNL-NEXT:    vpmovusdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    vmovdqu %ymm0, (%rdi)
-; KNL-NEXT:    vzeroupper
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: usat_trunc_db_1024_mem:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; SKX-NEXT:    vpminud %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpminud %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmovdw %zmm0, %ymm0
-; SKX-NEXT:    vpmovdw %zmm1, %ymm1
-; SKX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; SKX-NEXT:    vpmovwb %zmm0, (%rdi)
-; SKX-NEXT:    vzeroupper
-; SKX-NEXT:    retq
+; ALL-LABEL: usat_trunc_db_1024_mem:
+; ALL:       ## %bb.0:
+; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
+; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vzeroupper
+; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %x6 = trunc <32 x i32> %x5 to <32 x i8>
@@ -732,13 +709,9 @@ define <8 x i8> @usat_trunc_wb_128(<8 x
 define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
 ; ALL-LABEL: usat_trunc_qw_1024:
 ; ALL:       ## %bb.0:
-; ALL-NEXT:    vpbroadcastq {{.*#+}} zmm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; ALL-NEXT:    vpminuq %zmm2, %zmm1, %zmm1
-; ALL-NEXT:    vpminuq %zmm2, %zmm0, %zmm0
-; ALL-NEXT:    vpmovqd %zmm0, %ymm0
-; ALL-NEXT:    vpmovqd %zmm1, %ymm1
-; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    vpmovdw %zmm0, %ymm0
+; ALL-NEXT:    vpmovusqw %zmm0, %xmm0
+; ALL-NEXT:    vpmovusqw %zmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; ALL-NEXT:    retq
   %x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
   %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
@@ -954,29 +927,15 @@ define void @smax_usat_trunc_qw_512_mem(
 }
 
 define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
-; KNL-LABEL: smax_usat_trunc_db_1024:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
-; KNL-NEXT:    vpmovusdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: smax_usat_trunc_db_1024:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; SKX-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; SKX-NEXT:    vpminsd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpminsd %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmovdw %zmm0, %ymm0
-; SKX-NEXT:    vpmovdw %zmm1, %ymm1
-; SKX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; SKX-NEXT:    vpmovwb %zmm0, %ymm0
-; SKX-NEXT:    retq
+; ALL-LABEL: smax_usat_trunc_db_1024:
+; ALL:       ## %bb.0:
+; ALL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; ALL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
+; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
+; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
+; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@@ -986,32 +945,17 @@ define <32 x i8> @smax_usat_trunc_db_102
 }
 
 define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
-; KNL-LABEL: smax_usat_trunc_db_1024_mem:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
-; KNL-NEXT:    vpmovusdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    vmovdqu %ymm0, (%rdi)
-; KNL-NEXT:    vzeroupper
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: smax_usat_trunc_db_1024_mem:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; SKX-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; SKX-NEXT:    vpminsd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpminsd %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmovdw %zmm0, %ymm0
-; SKX-NEXT:    vpmovdw %zmm1, %ymm1
-; SKX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; SKX-NEXT:    vpmovwb %zmm0, (%rdi)
-; SKX-NEXT:    vzeroupper
-; SKX-NEXT:    retq
+; ALL-LABEL: smax_usat_trunc_db_1024_mem:
+; ALL:       ## %bb.0:
+; ALL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; ALL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
+; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
+; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
+; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vzeroupper
+; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>

Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc.ll?rev=347479&r1=347478&r2=347479&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc.ll Thu Nov 22 14:56:52 2018
@@ -650,23 +650,12 @@ define void @usat_trunc_qw_512_mem(<8 x
 }
 
 define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
-; KNL-LABEL: usat_trunc_db_1024:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
-; KNL-NEXT:    vpmovusdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: usat_trunc_db_1024:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; SKX-NEXT:    vpminud %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpminud %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmovdw %zmm0, %ymm0
-; SKX-NEXT:    vpmovdw %zmm1, %ymm1
-; SKX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; SKX-NEXT:    vpmovwb %zmm0, %ymm0
-; SKX-NEXT:    retq
+; ALL-LABEL: usat_trunc_db_1024:
+; ALL:       ## %bb.0:
+; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
+; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %x6 = trunc <32 x i32> %x5 to <32 x i8>
@@ -674,26 +663,14 @@ define <32 x i8> @usat_trunc_db_1024(<32
 }
 
 define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
-; KNL-LABEL: usat_trunc_db_1024_mem:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
-; KNL-NEXT:    vpmovusdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    vmovdqu %ymm0, (%rdi)
-; KNL-NEXT:    vzeroupper
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: usat_trunc_db_1024_mem:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; SKX-NEXT:    vpminud %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpminud %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmovdw %zmm0, %ymm0
-; SKX-NEXT:    vpmovdw %zmm1, %ymm1
-; SKX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; SKX-NEXT:    vpmovwb %zmm0, (%rdi)
-; SKX-NEXT:    vzeroupper
-; SKX-NEXT:    retq
+; ALL-LABEL: usat_trunc_db_1024_mem:
+; ALL:       ## %bb.0:
+; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
+; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vzeroupper
+; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %x6 = trunc <32 x i32> %x5 to <32 x i8>
@@ -726,13 +703,9 @@ define <8 x i8> @usat_trunc_wb_128(<8 x
 define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
 ; ALL-LABEL: usat_trunc_qw_1024:
 ; ALL:       ## %bb.0:
-; ALL-NEXT:    vpbroadcastq {{.*#+}} zmm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; ALL-NEXT:    vpminuq %zmm2, %zmm1, %zmm1
-; ALL-NEXT:    vpminuq %zmm2, %zmm0, %zmm0
-; ALL-NEXT:    vpmovqd %zmm0, %ymm0
-; ALL-NEXT:    vpmovqd %zmm1, %ymm1
-; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    vpmovdw %zmm0, %ymm0
+; ALL-NEXT:    vpmovusqw %zmm0, %xmm0
+; ALL-NEXT:    vpmovusqw %zmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; ALL-NEXT:    retq
   %x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
   %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
@@ -950,29 +923,15 @@ define void @smax_usat_trunc_qw_512_mem(
 }
 
 define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
-; KNL-LABEL: smax_usat_trunc_db_1024:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
-; KNL-NEXT:    vpmovusdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: smax_usat_trunc_db_1024:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; SKX-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; SKX-NEXT:    vpminsd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpminsd %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmovdw %zmm0, %ymm0
-; SKX-NEXT:    vpmovdw %zmm1, %ymm1
-; SKX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; SKX-NEXT:    vpmovwb %zmm0, %ymm0
-; SKX-NEXT:    retq
+; ALL-LABEL: smax_usat_trunc_db_1024:
+; ALL:       ## %bb.0:
+; ALL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; ALL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
+; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
+; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
+; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@@ -982,32 +941,17 @@ define <32 x i8> @smax_usat_trunc_db_102
 }
 
 define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
-; KNL-LABEL: smax_usat_trunc_db_1024_mem:
-; KNL:       ## %bb.0:
-; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; KNL-NEXT:    vpmovusdb %zmm0, %xmm0
-; KNL-NEXT:    vpmovusdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    vmovdqu %ymm0, (%rdi)
-; KNL-NEXT:    vzeroupper
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: smax_usat_trunc_db_1024_mem:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; SKX-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; SKX-NEXT:    vpminsd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vpminsd %zmm2, %zmm0, %zmm0
-; SKX-NEXT:    vpmovdw %zmm0, %ymm0
-; SKX-NEXT:    vpmovdw %zmm1, %ymm1
-; SKX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; SKX-NEXT:    vpmovwb %zmm0, (%rdi)
-; SKX-NEXT:    vzeroupper
-; SKX-NEXT:    retq
+; ALL-LABEL: smax_usat_trunc_db_1024_mem:
+; ALL:       ## %bb.0:
+; ALL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; ALL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
+; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
+; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
+; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vzeroupper
+; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>

Modified: llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll?rev=347479&r1=347478&r2=347479&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll (original)
+++ llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll Thu Nov 22 14:56:52 2018
@@ -588,12 +588,12 @@ define <16 x i16> @test_16f32toub_256(<1
 ; CHECK-LABEL: test_16f32toub_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vcvttps2dq (%rdi), %ymm1
-; CHECK-NEXT:    vpmovdw %ymm1, %xmm1
-; CHECK-NEXT:    vcvttps2dq 32(%rdi), %ymm2
-; CHECK-NEXT:    vpmovdw %ymm2, %xmm2
-; CHECK-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
-; CHECK-NEXT:    vpsllw $15, %ymm1, %ymm1
-; CHECK-NEXT:    vpmovw2m %ymm1, %k1
+; CHECK-NEXT:    vpslld $31, %ymm1, %ymm1
+; CHECK-NEXT:    vpmovd2m %ymm1, %k0
+; CHECK-NEXT:    vcvttps2dq 32(%rdi), %ymm1
+; CHECK-NEXT:    vpslld $31, %ymm1, %ymm1
+; CHECK-NEXT:    vpmovd2m %ymm1, %k1
+; CHECK-NEXT:    kunpckbw %k0, %k1, %k1
 ; CHECK-NEXT:    vmovdqu16 %ymm0, %ymm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %a = load <16 x float>, <16 x float>* %ptr
@@ -620,12 +620,10 @@ define <16 x i16> @test_16f32tosb_256(<1
 ; CHECK-LABEL: test_16f32tosb_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vcvttps2dq (%rdi), %ymm1
-; CHECK-NEXT:    vpmovdw %ymm1, %xmm1
-; CHECK-NEXT:    vcvttps2dq 32(%rdi), %ymm2
-; CHECK-NEXT:    vpmovdw %ymm2, %xmm2
-; CHECK-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
-; CHECK-NEXT:    vpsllw $15, %ymm1, %ymm1
-; CHECK-NEXT:    vpmovw2m %ymm1, %k1
+; CHECK-NEXT:    vpmovd2m %ymm1, %k0
+; CHECK-NEXT:    vcvttps2dq 32(%rdi), %ymm1
+; CHECK-NEXT:    vpmovd2m %ymm1, %k1
+; CHECK-NEXT:    kunpckbw %k0, %k1, %k1
 ; CHECK-NEXT:    vmovdqu16 %ymm0, %ymm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %a = load <16 x float>, <16 x float>* %ptr




More information about the llvm-commits mailing list