[llvm] r326637 - [LegalizeVectorTypes] When scalarizing the operand of a unary op like TRUNC, use a SCALAR_TO_VECTOR rather than a single element BUILD_VECTOR to convert back to a vector type.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 2 15:27:50 PST 2018


Author: ctopper
Date: Fri Mar  2 15:27:50 2018
New Revision: 326637

URL: http://llvm.org/viewvc/llvm-project?rev=326637&view=rev
Log:
[LegalizeVectorTypes] When scalarizing the operand of a unary op like TRUNC, use a SCALAR_TO_VECTOR rather than a single element BUILD_VECTOR to convert back to a vector type.

X86 considers v1i1 a legal type under AVX512 and as such a truncate from a v1iX type to v1i1 can be turned into a scalar truncate plus a conversion to v1i1. We would much prefer a v1i1 SCALAR_TO_VECTOR over a one element BUILD_VECTOR.

During lowering we were detecting the v1i1 BUILD_VECTOR as a splat BUILD_VECTOR like we try to do for v2i1/v4i1/etc. In this case we create (select i1 splat_elt, v1i1 all-ones, v1i1 all-zeroes). That goes through some more legalization and we end up with a CMOV choosing between 0 and 1 in scalar and a scalar_to_vector.

Arguably we could detect the v1i1 BUILD_VECTOR and do this better in X86 target code. But just using a SCALAR_TO_VECTOR in legalization is much easier.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/trunk/test/CodeGen/X86/avx512-load-trunc-store-i1.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=326637&r1=326636&r2=326637&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Fri Mar  2 15:27:50 2018
@@ -525,7 +525,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp
                            N->getValueType(0).getScalarType(), Elt);
   // Revectorize the result so the types line up with what the uses of this
   // expression expect.
-  return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op);
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
 }
 
 /// The vectors to concatenate have length one - use a BUILD_VECTOR instead.

Modified: llvm/trunk/test/CodeGen/X86/avx512-load-trunc-store-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-load-trunc-store-i1.ll?rev=326637&r1=326636&r2=326637&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-load-trunc-store-i1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-load-trunc-store-i1.ll Fri Mar  2 15:27:50 2018
@@ -5,18 +5,13 @@
 define void @load_v1i2_trunc_v1i1_store(<1 x i2>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i2_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    movb (%rdi), %al
-; AVX512-ALL-NEXT:    testb %al, %al
-; AVX512-ALL-NEXT:    setne %al
-; AVX512-ALL-NEXT:    kmovd %eax, %k0
+; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
 ; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i2_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
 ; AVX512-ONLY-NEXT:    movb (%rdi), %al
-; AVX512-ONLY-NEXT:    testb %al, %al
-; AVX512-ONLY-NEXT:    setne %al
 ; AVX512-ONLY-NEXT:    movb %al, (%rsi)
 ; AVX512-ONLY-NEXT:    retq
     %d0 = load <1 x i2>, <1 x i2>* %a0
@@ -27,18 +22,13 @@ define void @load_v1i2_trunc_v1i1_store(
 define void @load_v1i3_trunc_v1i1_store(<1 x i3>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i3_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    movb (%rdi), %al
-; AVX512-ALL-NEXT:    testb %al, %al
-; AVX512-ALL-NEXT:    setne %al
-; AVX512-ALL-NEXT:    kmovd %eax, %k0
+; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
 ; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i3_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
 ; AVX512-ONLY-NEXT:    movb (%rdi), %al
-; AVX512-ONLY-NEXT:    testb %al, %al
-; AVX512-ONLY-NEXT:    setne %al
 ; AVX512-ONLY-NEXT:    movb %al, (%rsi)
 ; AVX512-ONLY-NEXT:    retq
     %d0 = load <1 x i3>, <1 x i3>* %a0
@@ -49,18 +39,13 @@ define void @load_v1i3_trunc_v1i1_store(
 define void @load_v1i4_trunc_v1i1_store(<1 x i4>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i4_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    movb (%rdi), %al
-; AVX512-ALL-NEXT:    testb %al, %al
-; AVX512-ALL-NEXT:    setne %al
-; AVX512-ALL-NEXT:    kmovd %eax, %k0
+; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
 ; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i4_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
 ; AVX512-ONLY-NEXT:    movb (%rdi), %al
-; AVX512-ONLY-NEXT:    testb %al, %al
-; AVX512-ONLY-NEXT:    setne %al
 ; AVX512-ONLY-NEXT:    movb %al, (%rsi)
 ; AVX512-ONLY-NEXT:    retq
     %d0 = load <1 x i4>, <1 x i4>* %a0
@@ -71,16 +56,13 @@ define void @load_v1i4_trunc_v1i1_store(
 define void @load_v1i8_trunc_v1i1_store(<1 x i8>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i8_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    cmpb $0, (%rdi)
-; AVX512-ALL-NEXT:    setne %al
-; AVX512-ALL-NEXT:    kmovd %eax, %k0
+; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
 ; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i8_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    cmpb $0, (%rdi)
-; AVX512-ONLY-NEXT:    setne %al
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    movb %al, (%rsi)
 ; AVX512-ONLY-NEXT:    retq
     %d0 = load <1 x i8>, <1 x i8>* %a0
@@ -91,16 +73,13 @@ define void @load_v1i8_trunc_v1i1_store(
 define void @load_v1i16_trunc_v1i1_store(<1 x i16>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i16_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    cmpb $0, (%rdi)
-; AVX512-ALL-NEXT:    setne %al
-; AVX512-ALL-NEXT:    kmovd %eax, %k0
+; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
 ; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i16_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    cmpb $0, (%rdi)
-; AVX512-ONLY-NEXT:    setne %al
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    movb %al, (%rsi)
 ; AVX512-ONLY-NEXT:    retq
     %d0 = load <1 x i16>, <1 x i16>* %a0
@@ -111,16 +90,13 @@ define void @load_v1i16_trunc_v1i1_store
 define void @load_v1i32_trunc_v1i1_store(<1 x i32>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i32_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    cmpb $0, (%rdi)
-; AVX512-ALL-NEXT:    setne %al
-; AVX512-ALL-NEXT:    kmovd %eax, %k0
+; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
 ; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i32_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    cmpb $0, (%rdi)
-; AVX512-ONLY-NEXT:    setne %al
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    movb %al, (%rsi)
 ; AVX512-ONLY-NEXT:    retq
     %d0 = load <1 x i32>, <1 x i32>* %a0
@@ -131,16 +107,13 @@ define void @load_v1i32_trunc_v1i1_store
 define void @load_v1i64_trunc_v1i1_store(<1 x i64>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i64_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    cmpb $0, (%rdi)
-; AVX512-ALL-NEXT:    setne %al
-; AVX512-ALL-NEXT:    kmovd %eax, %k0
+; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
 ; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i64_trunc_v1i1_store:
 ; AVX512-ONLY:       # %bb.0:
-; AVX512-ONLY-NEXT:    cmpb $0, (%rdi)
-; AVX512-ONLY-NEXT:    setne %al
+; AVX512-ONLY-NEXT:    movb (%rdi), %al
 ; AVX512-ONLY-NEXT:    movb %al, (%rsi)
 ; AVX512-ONLY-NEXT:    retq
     %d0 = load <1 x i64>, <1 x i64>* %a0




More information about the llvm-commits mailing list