[llvm] r340143 - [SelectionDAG] Add basic demanded elements support to ComputeNumSignBits for BITCAST nodes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 19 10:47:50 PDT 2018
Author: rksimon
Date: Sun Aug 19 10:47:50 2018
New Revision: 340143
URL: http://llvm.org/viewvc/llvm-project?rev=340143&view=rev
Log:
[SelectionDAG] Add basic demanded elements support to ComputeNumSignBits for BITCAST nodes
Only adds support to the existing 'large element' scalar/vector to 'small element' vector bitcasts.
The next step would be to support cases where the large elements aren't all sign bits, and determine the small element equivalent based on the demanded elements.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/X86/packss.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=340143&r1=340142&r2=340143&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Sun Aug 19 10:47:50 2018
@@ -3245,7 +3245,14 @@ unsigned SelectionDAG::ComputeNumSignBit
// Requires handling of DemandedElts and Endianness.
if ((SrcBits % VTBits) == 0) {
assert(Op.getValueType().isVector() && "Expected bitcast to vector");
- Tmp = ComputeNumSignBits(N0, Depth + 1);
+
+ unsigned Scale = SrcBits / VTBits;
+ APInt SrcDemandedElts(NumElts / Scale, 0);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBit(i / Scale);
+
+ Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
if (Tmp == SrcBits)
return VTBits;
}
Modified: llvm/trunk/test/CodeGen/X86/packss.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/packss.ll?rev=340143&r1=340142&r2=340143&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/packss.ll (original)
+++ llvm/trunk/test/CodeGen/X86/packss.ll Sun Aug 19 10:47:50 2018
@@ -107,29 +107,25 @@ define <8 x i16> @trunc_ashr_v4i32_icmp_
define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X86-SSE-LABEL: trunc_ashr_v4i64_demandedelts:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE-NEXT: movdqa %xmm1, %xmm2
; X86-SSE-NEXT: psllq $63, %xmm2
-; X86-SSE-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE-NEXT: movdqa %xmm1, %xmm3
; X86-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
-; X86-SSE-NEXT: movdqa %xmm1, %xmm2
+; X86-SSE-NEXT: movdqa %xmm0, %xmm2
; X86-SSE-NEXT: psllq $63, %xmm2
-; X86-SSE-NEXT: movdqa %xmm1, %xmm4
+; X86-SSE-NEXT: movdqa %xmm0, %xmm4
; X86-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
; X86-SSE-NEXT: psrlq $63, %xmm4
-; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
; X86-SSE-NEXT: movapd {{.*#+}} xmm2 = [4.940656e-324,-0.000000e+00]
-; X86-SSE-NEXT: xorpd %xmm2, %xmm1
-; X86-SSE-NEXT: psubq %xmm2, %xmm1
-; X86-SSE-NEXT: psrlq $63, %xmm3
-; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X86-SSE-NEXT: xorpd %xmm2, %xmm0
; X86-SSE-NEXT: psubq %xmm2, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X86-SSE-NEXT: psrlq $63, %xmm3
+; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; X86-SSE-NEXT: xorpd %xmm2, %xmm1
+; X86-SSE-NEXT: psubq %xmm2, %xmm1
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X86-SSE-NEXT: pslld $16, %xmm1
-; X86-SSE-NEXT: psrad $16, %xmm1
-; X86-SSE-NEXT: pslld $16, %xmm0
-; X86-SSE-NEXT: psrad $16, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-SSE-NEXT: packssdw %xmm1, %xmm0
; X86-SSE-NEXT: retl
;
@@ -151,10 +147,7 @@ define <8 x i16> @trunc_ashr_v4i64_deman
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; X86-AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
;
@@ -167,37 +160,33 @@ define <8 x i16> @trunc_ashr_v4i64_deman
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3,16,17,16,17,16,17,16,17,16,17,16,17,16,17,18,19]
-; X86-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X86-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; X86-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; X86-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
;
; X64-SSE-LABEL: trunc_ashr_v4i64_demandedelts:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movdqa %xmm0, %xmm2
+; X64-SSE-NEXT: movdqa %xmm1, %xmm2
; X64-SSE-NEXT: psllq $63, %xmm2
-; X64-SSE-NEXT: movdqa %xmm0, %xmm3
+; X64-SSE-NEXT: movdqa %xmm1, %xmm3
; X64-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
-; X64-SSE-NEXT: movdqa %xmm1, %xmm2
+; X64-SSE-NEXT: movdqa %xmm0, %xmm2
; X64-SSE-NEXT: psllq $63, %xmm2
-; X64-SSE-NEXT: movdqa %xmm1, %xmm4
+; X64-SSE-NEXT: movdqa %xmm0, %xmm4
; X64-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
; X64-SSE-NEXT: psrlq $63, %xmm4
-; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
; X64-SSE-NEXT: movapd {{.*#+}} xmm2 = [1,9223372036854775808]
-; X64-SSE-NEXT: xorpd %xmm2, %xmm1
-; X64-SSE-NEXT: psubq %xmm2, %xmm1
-; X64-SSE-NEXT: psrlq $63, %xmm3
-; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X64-SSE-NEXT: xorpd %xmm2, %xmm0
; X64-SSE-NEXT: psubq %xmm2, %xmm0
-; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X64-SSE-NEXT: psrlq $63, %xmm3
+; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; X64-SSE-NEXT: xorpd %xmm2, %xmm1
+; X64-SSE-NEXT: psubq %xmm2, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X64-SSE-NEXT: pslld $16, %xmm1
-; X64-SSE-NEXT: psrad $16, %xmm1
-; X64-SSE-NEXT: pslld $16, %xmm0
-; X64-SSE-NEXT: psrad $16, %xmm0
+; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-SSE-NEXT: packssdw %xmm1, %xmm0
; X64-SSE-NEXT: retq
;
@@ -220,10 +209,7 @@ define <8 x i16> @trunc_ashr_v4i64_deman
; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; X64-AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; X64-AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; X64-AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
@@ -234,9 +220,9 @@ define <8 x i16> @trunc_ashr_v4i64_deman
; X64-AVX2-NEXT: # ymm1 = mem[0,1,0,1]
; X64-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
-; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3,16,17,16,17,16,17,16,17,16,17,16,17,16,17,18,19]
-; X64-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; X64-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; X64-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; X64-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%1 = shl <4 x i64> %a0, <i64 63, i64 0, i64 63, i64 0>
More information about the llvm-commits
mailing list