[llvm] r340143 - [SelectionDAG] Add basic demanded elements support to ComputeNumSignBits for BITCAST nodes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 19 10:47:50 PDT 2018


Author: rksimon
Date: Sun Aug 19 10:47:50 2018
New Revision: 340143

URL: http://llvm.org/viewvc/llvm-project?rev=340143&view=rev
Log:
[SelectionDAG] Add basic demanded elements support to ComputeNumSignBits for BITCAST nodes

Only adds support to the existing 'large element' scalar/vector to 'small element' vector bitcasts.

The next step would be to support cases where the large elements aren't all sign bits, and determine the small element equivalent based on the demanded elements.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/trunk/test/CodeGen/X86/packss.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=340143&r1=340142&r2=340143&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Sun Aug 19 10:47:50 2018
@@ -3245,7 +3245,14 @@ unsigned SelectionDAG::ComputeNumSignBit
     // Requires handling of DemandedElts and Endianness.
     if ((SrcBits % VTBits) == 0) {
       assert(Op.getValueType().isVector() && "Expected bitcast to vector");
-      Tmp = ComputeNumSignBits(N0, Depth + 1);
+
+      unsigned Scale = SrcBits / VTBits;
+      APInt SrcDemandedElts(NumElts / Scale, 0);
+      for (unsigned i = 0; i != NumElts; ++i)
+        if (DemandedElts[i])
+          SrcDemandedElts.setBit(i / Scale);
+
+      Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
       if (Tmp == SrcBits)
         return VTBits;
     }

Modified: llvm/trunk/test/CodeGen/X86/packss.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/packss.ll?rev=340143&r1=340142&r2=340143&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/packss.ll (original)
+++ llvm/trunk/test/CodeGen/X86/packss.ll Sun Aug 19 10:47:50 2018
@@ -107,29 +107,25 @@ define <8 x i16> @trunc_ashr_v4i32_icmp_
 define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
 ; X86-SSE-LABEL: trunc_ashr_v4i64_demandedelts:
 ; X86-SSE:       # %bb.0:
-; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
 ; X86-SSE-NEXT:    psllq $63, %xmm2
-; X86-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm3
 ; X86-SSE-NEXT:    movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
-; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
 ; X86-SSE-NEXT:    psllq $63, %xmm2
-; X86-SSE-NEXT:    movdqa %xmm1, %xmm4
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm4
 ; X86-SSE-NEXT:    movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
 ; X86-SSE-NEXT:    psrlq $63, %xmm4
-; X86-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
 ; X86-SSE-NEXT:    movapd {{.*#+}} xmm2 = [4.940656e-324,-0.000000e+00]
-; X86-SSE-NEXT:    xorpd %xmm2, %xmm1
-; X86-SSE-NEXT:    psubq %xmm2, %xmm1
-; X86-SSE-NEXT:    psrlq $63, %xmm3
-; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
 ; X86-SSE-NEXT:    xorpd %xmm2, %xmm0
 ; X86-SSE-NEXT:    psubq %xmm2, %xmm0
-; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X86-SSE-NEXT:    psrlq $63, %xmm3
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; X86-SSE-NEXT:    xorpd %xmm2, %xmm1
+; X86-SSE-NEXT:    psubq %xmm2, %xmm1
 ; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X86-SSE-NEXT:    pslld $16, %xmm1
-; X86-SSE-NEXT:    psrad $16, %xmm1
-; X86-SSE-NEXT:    pslld $16, %xmm0
-; X86-SSE-NEXT:    psrad $16, %xmm0
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
 ; X86-SSE-NEXT:    packssdw %xmm1, %xmm0
 ; X86-SSE-NEXT:    retl
 ;
@@ -151,10 +147,7 @@ define <8 x i16> @trunc_ashr_v4i64_deman
 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X86-AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; X86-AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
-; X86-AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
@@ -167,37 +160,33 @@ define <8 x i16> @trunc_ashr_v4i64_deman
 ; X86-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
 ; X86-AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
 ; X86-AVX2-NEXT:    vpsubq %ymm3, %ymm0, %ymm0
-; X86-AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3,16,17,16,17,16,17,16,17,16,17,16,17,16,17,18,19]
-; X86-AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X86-AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; X86-AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; X86-AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
 ;
 ; X64-SSE-LABEL: trunc_ashr_v4i64_demandedelts:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X64-SSE-NEXT:    movdqa %xmm1, %xmm2
 ; X64-SSE-NEXT:    psllq $63, %xmm2
-; X64-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X64-SSE-NEXT:    movdqa %xmm1, %xmm3
 ; X64-SSE-NEXT:    movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
-; X64-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X64-SSE-NEXT:    movdqa %xmm0, %xmm2
 ; X64-SSE-NEXT:    psllq $63, %xmm2
-; X64-SSE-NEXT:    movdqa %xmm1, %xmm4
+; X64-SSE-NEXT:    movdqa %xmm0, %xmm4
 ; X64-SSE-NEXT:    movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
 ; X64-SSE-NEXT:    psrlq $63, %xmm4
-; X64-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
+; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
 ; X64-SSE-NEXT:    movapd {{.*#+}} xmm2 = [1,9223372036854775808]
-; X64-SSE-NEXT:    xorpd %xmm2, %xmm1
-; X64-SSE-NEXT:    psubq %xmm2, %xmm1
-; X64-SSE-NEXT:    psrlq $63, %xmm3
-; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
 ; X64-SSE-NEXT:    xorpd %xmm2, %xmm0
 ; X64-SSE-NEXT:    psubq %xmm2, %xmm0
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-SSE-NEXT:    psrlq $63, %xmm3
+; X64-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; X64-SSE-NEXT:    xorpd %xmm2, %xmm1
+; X64-SSE-NEXT:    psubq %xmm2, %xmm1
 ; X64-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X64-SSE-NEXT:    pslld $16, %xmm1
-; X64-SSE-NEXT:    psrad $16, %xmm1
-; X64-SSE-NEXT:    pslld $16, %xmm0
-; X64-SSE-NEXT:    psrad $16, %xmm0
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
 ; X64-SSE-NEXT:    packssdw %xmm1, %xmm0
 ; X64-SSE-NEXT:    retq
 ;
@@ -220,10 +209,7 @@ define <8 x i16> @trunc_ashr_v4i64_deman
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; X64-AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
-; X64-AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
@@ -234,9 +220,9 @@ define <8 x i16> @trunc_ashr_v4i64_deman
 ; X64-AVX2-NEXT:    # ymm1 = mem[0,1,0,1]
 ; X64-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3,16,17,16,17,16,17,16,17,16,17,16,17,16,17,18,19]
-; X64-AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; X64-AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; X64-AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
   %1 = shl <4 x i64> %a0, <i64 63, i64 0, i64 63, i64 0>




More information about the llvm-commits mailing list