[llvm] r365057 - [X86] ComputeNumSignBitsForTargetNode - add target shuffle support.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 3 10:06:59 PDT 2019
Author: rksimon
Date: Wed Jul 3 10:06:59 2019
New Revision: 365057
URL: http://llvm.org/viewvc/llvm-project?rev=365057&view=rev
Log:
[X86] ComputeNumSignBitsForTargetNode - add target shuffle support.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=365057&r1=365056&r2=365057&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jul 3 10:06:59 2019
@@ -31271,7 +31271,8 @@ void X86TargetLowering::computeKnownBits
unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const {
- unsigned VTBits = Op.getScalarValueSizeInBits();
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getScalarSizeInBits();
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case X86ISD::SETCC_CARRY:
@@ -31353,6 +31354,54 @@ unsigned X86TargetLowering::ComputeNumSi
}
}
+ // Handle target shuffles.
+ // TODO - use resolveTargetShuffleInputs once we can limit recursive depth.
+ if (isTargetShuffle(Opcode)) {
+ bool IsUnary;
+ SmallVector<int, 64> Mask;
+ SmallVector<SDValue, 2> Ops;
+ if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask,
+ IsUnary)) {
+ unsigned NumOps = Ops.size();
+ unsigned NumElts = VT.getVectorNumElements();
+ if (Mask.size() == NumElts) {
+ SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0));
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ int M = Mask[i];
+ if (M == SM_SentinelUndef) {
+ // For UNDEF elements, we don't know anything about the common state
+ // of the shuffle result.
+ return 1;
+ } else if (M == SM_SentinelZero) {
+ // Zero = all sign bits.
+ continue;
+ }
+ assert(0 <= M && (unsigned)M < (NumOps * NumElts) &&
+ "Shuffle index out of range");
+
+ unsigned OpIdx = (unsigned)M / NumElts;
+ unsigned EltIdx = (unsigned)M % NumElts;
+ if (Ops[OpIdx].getValueType() != VT) {
+ // TODO - handle target shuffle ops with different value types.
+ return 1;
+ }
+ DemandedOps[OpIdx].setBit(EltIdx);
+ }
+ unsigned Tmp0 = VTBits;
+ for (unsigned i = 0; i != NumOps && Tmp0 > 1; ++i) {
+ if (!DemandedOps[i])
+ continue;
+ unsigned Tmp1 =
+ DAG.ComputeNumSignBits(Ops[i], DemandedOps[i], Depth + 1);
+ Tmp0 = std::min(Tmp0, Tmp1);
+ }
+ return Tmp0;
+ }
+ }
+ }
+
// Fallback case.
return 1;
}
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll?rev=365057&r1=365056&r2=365057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll Wed Jul 3 10:06:59 2019
@@ -1277,8 +1277,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; SSE2-NEXT: pandn %xmm1, %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
-; SSE2-NEXT: pslld $16, %xmm0
-; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm2
@@ -1297,8 +1295,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; SSE41-NEXT: psrad $16, %xmm1
; SSE41-NEXT: pmaxsd %xmm0, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE41-NEXT: pslld $16, %xmm0
-; SSE41-NEXT: psrad $16, %xmm0
; SSE41-NEXT: pmaxsd %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
@@ -1313,8 +1309,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX-NEXT: vpmaxsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpslld $16, %xmm1, %xmm1
-; AVX-NEXT: vpsrad $16, %xmm1, %xmm1
; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
@@ -1329,8 +1323,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX512-NEXT: vpmaxsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpslld $16, %xmm1, %xmm1
-; AVX512-NEXT: vpsrad $16, %xmm1, %xmm1
; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
@@ -1764,8 +1756,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; SSE2-NEXT: pandn %xmm1, %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
-; SSE2-NEXT: pslld $24, %xmm0
-; SSE2-NEXT: psrad $24, %xmm0
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm2
@@ -1784,8 +1774,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; SSE41-NEXT: psrad $24, %xmm1
; SSE41-NEXT: pmaxsd %xmm0, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE41-NEXT: pslld $24, %xmm0
-; SSE41-NEXT: psrad $24, %xmm0
; SSE41-NEXT: pmaxsd %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
@@ -1800,8 +1788,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpsrad $24, %xmm0, %xmm0
; AVX-NEXT: vpmaxsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpslld $24, %xmm1, %xmm1
-; AVX-NEXT: vpsrad $24, %xmm1, %xmm1
; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
@@ -1816,8 +1802,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpsrad $24, %xmm0, %xmm0
; AVX512-NEXT: vpmaxsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpslld $24, %xmm1, %xmm1
-; AVX512-NEXT: vpsrad $24, %xmm1, %xmm1
; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll?rev=365057&r1=365056&r2=365057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll Wed Jul 3 10:06:59 2019
@@ -1276,8 +1276,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; SSE2-NEXT: pandn %xmm1, %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
-; SSE2-NEXT: pslld $16, %xmm0
-; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm2
@@ -1296,8 +1294,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; SSE41-NEXT: psrad $16, %xmm1
; SSE41-NEXT: pminsd %xmm0, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE41-NEXT: pslld $16, %xmm0
-; SSE41-NEXT: psrad $16, %xmm0
; SSE41-NEXT: pminsd %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
@@ -1312,8 +1308,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX-NEXT: vpminsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpslld $16, %xmm1, %xmm1
-; AVX-NEXT: vpsrad $16, %xmm1, %xmm1
; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
@@ -1328,8 +1322,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX512-NEXT: vpminsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpslld $16, %xmm1, %xmm1
-; AVX512-NEXT: vpsrad $16, %xmm1, %xmm1
; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
@@ -1763,8 +1755,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; SSE2-NEXT: pandn %xmm1, %xmm2
; SSE2-NEXT: por %xmm0, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
-; SSE2-NEXT: pslld $24, %xmm0
-; SSE2-NEXT: psrad $24, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm2
@@ -1783,8 +1773,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; SSE41-NEXT: psrad $24, %xmm1
; SSE41-NEXT: pminsd %xmm0, %xmm1
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE41-NEXT: pslld $24, %xmm0
-; SSE41-NEXT: psrad $24, %xmm0
; SSE41-NEXT: pminsd %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
@@ -1799,8 +1787,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpsrad $24, %xmm0, %xmm0
; AVX-NEXT: vpminsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpslld $24, %xmm1, %xmm1
-; AVX-NEXT: vpsrad $24, %xmm1, %xmm1
; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
@@ -1815,8 +1801,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpsrad $24, %xmm0, %xmm0
; AVX512-NEXT: vpminsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpslld $24, %xmm1, %xmm1
-; AVX512-NEXT: vpsrad $24, %xmm1, %xmm1
; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
More information about the llvm-commits
mailing list