[llvm] r364299 - [TargetLowering] SimplifyDemandedBits SIGN_EXTEND_VECTOR_INREG -> ANY/ZERO_EXTEND_VECTOR_INREG

Tue Jun 25 05:19:12 PDT 2019

Author: rksimon
Date: Tue Jun 25 05:19:12 2019
New Revision: 364299

URL: http://llvm.org/viewvc/llvm-project?rev=364299&view=rev
Log:
[TargetLowering] SimplifyDemandedBits SIGN_EXTEND_VECTOR_INREG -> ANY/ZERO_EXTEND_VECTOR_INREG

Simplify SIGN_EXTEND_VECTOR_INREG if the extended bits are not required/known zero.

Matches what we already do for SIGN_EXTEND.

Reapplies rL363802 but now with legality checks added at rL364290

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/trunk/test/CodeGen/X86/pmul.ll
    llvm/trunk/test/CodeGen/X86/vselect.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=364299&r1=364298&r2=364299&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Tue Jun 25 05:19:12 2019
@@ -1416,9 +1416,9 @@ bool TargetLowering::SimplifyDemandedBit
     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
 
     // If none of the top bits are demanded, convert this into an any_extend.
-    // TODO: Add SIGN_EXTEND_VECTOR_INREG - ANY_EXTEND_VECTOR_INREG fold.
-    if (DemandedBits.getActiveBits() <= InBits && !IsVecInReg) {
-      unsigned Opc = ISD::ANY_EXTEND;
+    if (DemandedBits.getActiveBits() <= InBits) {
+      unsigned Opc =
+          IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
     }
@@ -1440,9 +1440,9 @@ bool TargetLowering::SimplifyDemandedBit
     Known = Known.sext(BitWidth);
 
     // If the sign bit is known zero, convert this to a zero extend.
-    // TODO: Add SIGN_EXTEND_VECTOR_INREG - ZERO_EXTEND_VECTOR_INREG fold.
-    if (Known.isNonNegative() && !IsVecInReg) {
-      unsigned Opc = ISD::ZERO_EXTEND;
+    if (Known.isNonNegative()) {
+      unsigned Opc =
+          IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
     }

Modified: llvm/trunk/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmul.ll?rev=364299&r1=364298&r2=364299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmul.ll Tue Jun 25 05:19:12 2019
@@ -1326,15 +1326,13 @@ define <8 x i64> @mul_v8i64_sext(<8 x i1
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pmovsxwq %xmm3, %xmm6
 ; SSE41-NEXT:    pmovsxwq %xmm0, %xmm7
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE41-NEXT:    pmovsxdq %xmm0, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[2,2,3,3]
 ; SSE41-NEXT:    pmuldq %xmm4, %xmm3
-; SSE41-NEXT:    pmovsxdq %xmm2, %xmm2
+; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
 ; SSE41-NEXT:    pmuldq %xmm5, %xmm2
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pmovsxdq %xmm0, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,2,3,3]
 ; SSE41-NEXT:    pmuldq %xmm6, %xmm4
-; SSE41-NEXT:    pmovsxdq %xmm1, %xmm0
+; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
 ; SSE41-NEXT:    pmuldq %xmm7, %xmm0
 ; SSE41-NEXT:    movdqa %xmm4, %xmm1
 ; SSE41-NEXT:    retq
@@ -1345,9 +1343,9 @@ define <8 x i64> @mul_v8i64_sext(<8 x i1
 ; AVX2-NEXT:    vpmovsxwq %xmm2, %ymm2
 ; AVX2-NEXT:    vpmovsxwq %xmm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT:    vpmovsxdq %xmm3, %ymm3
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
 ; AVX2-NEXT:    vpmuldq %ymm3, %ymm2, %ymm2
-; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
 ; AVX2-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vmovdqa %ymm2, %ymm1
 ; AVX2-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect.ll?rev=364299&r1=364298&r2=364299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect.ll Tue Jun 25 05:19:12 2019
@@ -672,14 +672,23 @@ define i64 @vselect_any_extend_vector_in
 ; SSE41-NEXT:    movq %xmm2, %rax
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: vselect_any_extend_vector_inreg_crash:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
-; AVX-NEXT:    vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vpmovsxwq %xmm0, %xmm0
-; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    andl $32768, %eax # imm = 0x8000
-; AVX-NEXT:    retq
+; AVX1-LABEL: vselect_any_extend_vector_inreg_crash:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX1-NEXT:    vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    andl $32768, %eax # imm = 0x8000
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: vselect_any_extend_vector_inreg_crash:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX2-NEXT:    vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    andl $32768, %eax # imm = 0x8000
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 0:
   %1 = load <8 x i8>, <8 x i8>* %x
   %2 = icmp eq <8 x i8> %1, <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>