[llvm] r291746 - [AVX-512] Improve lowering of sign_extend of v4i1 to v4i32 and v2i1 to v2i64 when avx512vl is available, but not avx512dq.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 11 22:49:08 PST 2017


Author: ctopper
Date: Thu Jan 12 00:49:08 2017
New Revision: 291746

URL: http://llvm.org/viewvc/llvm-project?rev=291746&view=rev
Log:
[AVX-512] Improve lowering of sign_extend of v4i1 to v4i32 and v2i1 to v2i64 when avx512vl is available, but not avx512dq.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-cvt.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=291746&r1=291745&r2=291746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jan 12 00:49:08 2017
@@ -1280,6 +1280,8 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::FP_TO_UINT,       MVT::v4i32, Legal);
       setOperationAction(ISD::ZERO_EXTEND,      MVT::v4i32, Custom);
       setOperationAction(ISD::ZERO_EXTEND,      MVT::v2i64, Custom);
+      setOperationAction(ISD::SIGN_EXTEND,      MVT::v4i32, Custom);
+      setOperationAction(ISD::SIGN_EXTEND,      MVT::v2i64, Custom);
 
       // FIXME. This commands are available on SSE/AVX2, add relevant patterns.
       setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8,  Legal);
@@ -1306,10 +1308,7 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i16, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
-    if (Subtarget.hasDQI()) {
-      setOperationAction(ISD::SIGN_EXTEND,        MVT::v4i32, Custom);
-      setOperationAction(ISD::SIGN_EXTEND,        MVT::v2i64, Custom);
-    }
+
     for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
       setOperationAction(ISD::FFLOOR,     VT, Legal);
       setOperationAction(ISD::FCEIL,      VT, Legal);
@@ -17392,17 +17391,20 @@ static SDValue LowerSIGN_EXTEND_AVX512(S
 
   unsigned NumElts = VT.getVectorNumElements();
 
-  if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
-    return SDValue();
-
-  if (VT.is512BitVector() && InVTElt != MVT::i1) {
+  if (VT.is512BitVector() && InVTElt != MVT::i1 &&
+      (NumElts == 8 || NumElts == 16 || Subtarget.hasBWI())) {
     if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
       return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
     return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
   }
 
-  assert (InVTElt == MVT::i1 && "Unexpected vector type");
-  MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
+  if (InVTElt != MVT::i1)
+    return SDValue();
+
+  MVT ExtVT = VT;
+  if (!VT.is512BitVector() && !Subtarget.hasVLX())
+    ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
+
   SDValue V;
   if (Subtarget.hasDQI()) {
     V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In);
@@ -17411,7 +17413,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(S
     SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl);
     SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);
     V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
-    if (VT.is512BitVector())
+    if (ExtVT == VT)
       return V;
   }
 

Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=291746&r1=291745&r2=291746&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Thu Jan 12 00:49:08 2017
@@ -876,18 +876,18 @@ define <8 x double> @sitofp_8i8_double(<
 }
 
 define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
-; NODQ-LABEL: sitofp_16i1_double:
-; NODQ:       ## BB#0:
-; NODQ-NEXT:    vpxord %zmm2, %zmm2, %zmm2
-; NODQ-NEXT:    vcmpltpd %zmm1, %zmm2, %k1
-; NODQ-NEXT:    vcmpltpd %zmm0, %zmm2, %k2
-; NODQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; NODQ-NEXT:    vpmovqd %zmm0, %ymm0
-; NODQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
-; NODQ-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NODQ-NEXT:    vpmovqd %zmm1, %ymm1
-; NODQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
-; NODQ-NEXT:    retq
+; NOVLDQ-LABEL: sitofp_16i1_double:
+; NOVLDQ:       ## BB#0:
+; NOVLDQ-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; NOVLDQ-NEXT:    vcmpltpd %zmm1, %zmm2, %k1
+; NOVLDQ-NEXT:    vcmpltpd %zmm0, %zmm2, %k2
+; NOVLDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; NOVLDQ-NEXT:    vpmovqd %zmm0, %ymm0
+; NOVLDQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
+; NOVLDQ-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NOVLDQ-NEXT:    vpmovqd %zmm1, %ymm1
+; NOVLDQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
+; NOVLDQ-NEXT:    retq
 ;
 ; VLDQ-LABEL: sitofp_16i1_double:
 ; VLDQ:       ## BB#0:
@@ -900,6 +900,18 @@ define <16 x double> @sitofp_16i1_double
 ; VLDQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
 ; VLDQ-NEXT:    retq
 ;
+; VLNODQ-LABEL: sitofp_16i1_double:
+; VLNODQ:       ## BB#0:
+; VLNODQ-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; VLNODQ-NEXT:    vcmpltpd %zmm1, %zmm2, %k1
+; VLNODQ-NEXT:    vcmpltpd %zmm0, %zmm2, %k2
+; VLNODQ-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
+; VLNODQ-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k2} {z}
+; VLNODQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
+; VLNODQ-NEXT:    vmovdqa32 %ymm1, %ymm1 {%k1} {z}
+; VLNODQ-NEXT:    vcvtdq2pd %ymm1, %zmm1
+; VLNODQ-NEXT:    retq
+;
 ; AVX512DQ-LABEL: sitofp_16i1_double:
 ; AVX512DQ:       ## BB#0:
 ; AVX512DQ-NEXT:    vxorpd %zmm2, %zmm2, %zmm2
@@ -918,14 +930,14 @@ define <16 x double> @sitofp_16i1_double
 }
 
 define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
-; NODQ-LABEL: sitofp_8i1_double:
-; NODQ:       ## BB#0:
-; NODQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
-; NODQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
-; NODQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NODQ-NEXT:    vpmovqd %zmm0, %ymm0
-; NODQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
-; NODQ-NEXT:    retq
+; NOVLDQ-LABEL: sitofp_8i1_double:
+; NOVLDQ:       ## BB#0:
+; NOVLDQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; NOVLDQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
+; NOVLDQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    vpmovqd %zmm0, %ymm0
+; NOVLDQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
+; NOVLDQ-NEXT:    retq
 ;
 ; VLDQ-LABEL: sitofp_8i1_double:
 ; VLDQ:       ## BB#0:
@@ -935,6 +947,15 @@ define <8 x double> @sitofp_8i1_double(<
 ; VLDQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
 ; VLDQ-NEXT:    retq
 ;
+; VLNODQ-LABEL: sitofp_8i1_double:
+; VLNODQ:       ## BB#0:
+; VLNODQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; VLNODQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
+; VLNODQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; VLNODQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; VLNODQ-NEXT:    vcvtdq2pd %ymm0, %zmm0
+; VLNODQ-NEXT:    retq
+;
 ; AVX512DQ-LABEL: sitofp_8i1_double:
 ; AVX512DQ:       ## BB#0:
 ; AVX512DQ-NEXT:    vxorpd %zmm1, %zmm1, %zmm1
@@ -971,8 +992,8 @@ define <8 x float> @sitofp_8i1_float(<8
 ; VLNODQ:       ## BB#0:
 ; VLNODQ-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 ; VLNODQ-NEXT:    vcmpltps %ymm0, %ymm1, %k1
-; VLNODQ-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; VLNODQ-NEXT:    vpmovqd %zmm0, %ymm0
+; VLNODQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; VLNODQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; VLNODQ-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; VLNODQ-NEXT:    retq
 ;
@@ -1009,40 +1030,9 @@ define <4 x float> @sitofp_4i1_float(<4
 ; VLNODQ-LABEL: sitofp_4i1_float:
 ; VLNODQ:       ## BB#0:
 ; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT:    vcmpltps %xmm0, %xmm1, %k2
-; VLNODQ-NEXT:    kshiftlw $12, %k2, %k0
-; VLNODQ-NEXT:    kshiftrw $15, %k0, %k0
-; VLNODQ-NEXT:    kshiftlw $13, %k2, %k1
-; VLNODQ-NEXT:    kshiftrw $15, %k1, %k1
-; VLNODQ-NEXT:    kshiftlw $15, %k2, %k3
-; VLNODQ-NEXT:    kshiftrw $15, %k3, %k3
-; VLNODQ-NEXT:    kshiftlw $14, %k2, %k2
-; VLNODQ-NEXT:    kshiftrw $15, %k2, %k2
-; VLNODQ-NEXT:    kmovw %k2, %eax
-; VLNODQ-NEXT:    andl $1, %eax
-; VLNODQ-NEXT:    xorl %ecx, %ecx
-; VLNODQ-NEXT:    testb %al, %al
-; VLNODQ-NEXT:    movl $-1, %eax
-; VLNODQ-NEXT:    movl $0, %edx
-; VLNODQ-NEXT:    cmovnel %eax, %edx
-; VLNODQ-NEXT:    kmovw %k3, %esi
-; VLNODQ-NEXT:    andl $1, %esi
-; VLNODQ-NEXT:    testb %sil, %sil
-; VLNODQ-NEXT:    movl $0, %esi
-; VLNODQ-NEXT:    cmovnel %eax, %esi
-; VLNODQ-NEXT:    vmovd %esi, %xmm0
-; VLNODQ-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
-; VLNODQ-NEXT:    kmovw %k1, %edx
-; VLNODQ-NEXT:    andl $1, %edx
-; VLNODQ-NEXT:    testb %dl, %dl
-; VLNODQ-NEXT:    movl $0, %edx
-; VLNODQ-NEXT:    cmovnel %eax, %edx
-; VLNODQ-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
-; VLNODQ-NEXT:    kmovw %k0, %edx
-; VLNODQ-NEXT:    andl $1, %edx
-; VLNODQ-NEXT:    testb %dl, %dl
-; VLNODQ-NEXT:    cmovnel %eax, %ecx
-; VLNODQ-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
+; VLNODQ-NEXT:    vcmpltps %xmm0, %xmm1, %k1
+; VLNODQ-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; VLNODQ-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; VLNODQ-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; VLNODQ-NEXT:    retq
   %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
@@ -1070,40 +1060,9 @@ define <4 x double> @sitofp_4i1_double(<
 ; VLNODQ-LABEL: sitofp_4i1_double:
 ; VLNODQ:       ## BB#0:
 ; VLNODQ-NEXT:    vpxor %ymm1, %ymm1, %ymm1
-; VLNODQ-NEXT:    vcmpltpd %ymm0, %ymm1, %k2
-; VLNODQ-NEXT:    kshiftlw $12, %k2, %k0
-; VLNODQ-NEXT:    kshiftrw $15, %k0, %k0
-; VLNODQ-NEXT:    kshiftlw $13, %k2, %k1
-; VLNODQ-NEXT:    kshiftrw $15, %k1, %k1
-; VLNODQ-NEXT:    kshiftlw $15, %k2, %k3
-; VLNODQ-NEXT:    kshiftrw $15, %k3, %k3
-; VLNODQ-NEXT:    kshiftlw $14, %k2, %k2
-; VLNODQ-NEXT:    kshiftrw $15, %k2, %k2
-; VLNODQ-NEXT:    kmovw %k2, %eax
-; VLNODQ-NEXT:    andl $1, %eax
-; VLNODQ-NEXT:    xorl %ecx, %ecx
-; VLNODQ-NEXT:    testb %al, %al
-; VLNODQ-NEXT:    movl $-1, %eax
-; VLNODQ-NEXT:    movl $0, %edx
-; VLNODQ-NEXT:    cmovnel %eax, %edx
-; VLNODQ-NEXT:    kmovw %k3, %esi
-; VLNODQ-NEXT:    andl $1, %esi
-; VLNODQ-NEXT:    testb %sil, %sil
-; VLNODQ-NEXT:    movl $0, %esi
-; VLNODQ-NEXT:    cmovnel %eax, %esi
-; VLNODQ-NEXT:    vmovd %esi, %xmm0
-; VLNODQ-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
-; VLNODQ-NEXT:    kmovw %k1, %edx
-; VLNODQ-NEXT:    andl $1, %edx
-; VLNODQ-NEXT:    testb %dl, %dl
-; VLNODQ-NEXT:    movl $0, %edx
-; VLNODQ-NEXT:    cmovnel %eax, %edx
-; VLNODQ-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
-; VLNODQ-NEXT:    kmovw %k0, %edx
-; VLNODQ-NEXT:    andl $1, %edx
-; VLNODQ-NEXT:    testb %dl, %dl
-; VLNODQ-NEXT:    cmovnel %eax, %ecx
-; VLNODQ-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
+; VLNODQ-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
+; VLNODQ-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; VLNODQ-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; VLNODQ-NEXT:    vcvtdq2pd %xmm0, %ymm0
 ; VLNODQ-NEXT:    retq
   %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
@@ -1131,40 +1090,9 @@ define <2 x float> @sitofp_2i1_float(<2
 ; VLNODQ-LABEL: sitofp_2i1_float:
 ; VLNODQ:       ## BB#0:
 ; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT:    vcmpltps %xmm0, %xmm1, %k2
-; VLNODQ-NEXT:    kshiftlw $12, %k2, %k0
-; VLNODQ-NEXT:    kshiftrw $15, %k0, %k0
-; VLNODQ-NEXT:    kshiftlw $13, %k2, %k1
-; VLNODQ-NEXT:    kshiftrw $15, %k1, %k1
-; VLNODQ-NEXT:    kshiftlw $15, %k2, %k3
-; VLNODQ-NEXT:    kshiftrw $15, %k3, %k3
-; VLNODQ-NEXT:    kshiftlw $14, %k2, %k2
-; VLNODQ-NEXT:    kshiftrw $15, %k2, %k2
-; VLNODQ-NEXT:    kmovw %k2, %eax
-; VLNODQ-NEXT:    andl $1, %eax
-; VLNODQ-NEXT:    xorl %ecx, %ecx
-; VLNODQ-NEXT:    testb %al, %al
-; VLNODQ-NEXT:    movl $-1, %eax
-; VLNODQ-NEXT:    movl $0, %edx
-; VLNODQ-NEXT:    cmovnel %eax, %edx
-; VLNODQ-NEXT:    kmovw %k3, %esi
-; VLNODQ-NEXT:    andl $1, %esi
-; VLNODQ-NEXT:    testb %sil, %sil
-; VLNODQ-NEXT:    movl $0, %esi
-; VLNODQ-NEXT:    cmovnel %eax, %esi
-; VLNODQ-NEXT:    vmovd %esi, %xmm0
-; VLNODQ-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
-; VLNODQ-NEXT:    kmovw %k1, %edx
-; VLNODQ-NEXT:    andl $1, %edx
-; VLNODQ-NEXT:    testb %dl, %dl
-; VLNODQ-NEXT:    movl $0, %edx
-; VLNODQ-NEXT:    cmovnel %eax, %edx
-; VLNODQ-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
-; VLNODQ-NEXT:    kmovw %k0, %edx
-; VLNODQ-NEXT:    andl $1, %edx
-; VLNODQ-NEXT:    testb %dl, %dl
-; VLNODQ-NEXT:    cmovnel %eax, %ecx
-; VLNODQ-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
+; VLNODQ-NEXT:    vcmpltps %xmm0, %xmm1, %k1
+; VLNODQ-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; VLNODQ-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; VLNODQ-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; VLNODQ-NEXT:    retq
   %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
@@ -1192,25 +1120,14 @@ define <2 x double> @sitofp_2i1_double(<
 ; VLNODQ-LABEL: sitofp_2i1_double:
 ; VLNODQ:       ## BB#0:
 ; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
-; VLNODQ-NEXT:    kshiftlw $15, %k0, %k1
-; VLNODQ-NEXT:    kshiftrw $15, %k1, %k1
-; VLNODQ-NEXT:    kshiftlw $14, %k0, %k0
-; VLNODQ-NEXT:    kshiftrw $15, %k0, %k0
-; VLNODQ-NEXT:    kmovw %k0, %eax
-; VLNODQ-NEXT:    andl $1, %eax
-; VLNODQ-NEXT:    xorl %ecx, %ecx
-; VLNODQ-NEXT:    testb %al, %al
-; VLNODQ-NEXT:    movl $-1, %eax
-; VLNODQ-NEXT:    movl $0, %edx
-; VLNODQ-NEXT:    cmovnel %eax, %edx
-; VLNODQ-NEXT:    vcvtsi2sdl %edx, %xmm2, %xmm0
-; VLNODQ-NEXT:    kmovw %k1, %edx
-; VLNODQ-NEXT:    andl $1, %edx
-; VLNODQ-NEXT:    testb %dl, %dl
-; VLNODQ-NEXT:    cmovnel %eax, %ecx
-; VLNODQ-NEXT:    vcvtsi2sdl %ecx, %xmm2, %xmm1
-; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; VLNODQ-NEXT:    vcmpltpd %xmm0, %xmm1, %k1
+; VLNODQ-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; VLNODQ-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm1
+; VLNODQ-NEXT:    vmovq %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; VLNODQ-NEXT:    retq
   %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
   %1 = sitofp <2 x i1> %cmpres to <2 x double>




More information about the llvm-commits mailing list