[llvm] r345182 - [X86][SSE] Add SimplifyDemandedBitsForTargetNode PMULDQ/PMULUDQ handling
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 24 12:11:29 PDT 2018
Author: rksimon
Date: Wed Oct 24 12:11:28 2018
New Revision: 345182
URL: http://llvm.org/viewvc/llvm-project?rev=345182&view=rev
Log:
[X86][SSE] Add SimplifyDemandedBitsForTargetNode PMULDQ/PMULUDQ handling
Add X86 SimplifyDemandedBitsForTargetNode and use it to simplify PMULDQ/PMULUDQ target nodes.
This enables us to repeatedly simplify the node's arguments after the previous approach had to be reverted due to PR39398.
Differential Revision: https://reviews.llvm.org/D53643
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/test/CodeGen/X86/combine-pmuldq.ll
llvm/trunk/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=345182&r1=345181&r2=345182&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Oct 24 12:11:28 2018
@@ -31870,6 +31870,30 @@ bool X86TargetLowering::SimplifyDemanded
return false;
}
+bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
+ SDValue Op, const APInt &OriginalDemandedBits, KnownBits &Known,
+ TargetLoweringOpt &TLO, unsigned Depth) const {
+ unsigned Opc = Op.getOpcode();
+ switch(Opc) {
+ case X86ISD::PMULDQ:
+ case X86ISD::PMULUDQ: {
+ // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
+ KnownBits KnownOp;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ APInt DemandedMask = OriginalDemandedBits & APInt::getLowBitsSet(64, 32);
+ if (SimplifyDemandedBits(LHS, DemandedMask, KnownOp, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(RHS, DemandedMask, KnownOp, TLO, Depth + 1))
+ return true;
+ break;
+ }
+ }
+
+ return TargetLowering::SimplifyDemandedBitsForTargetNode(
+ Op, OriginalDemandedBits, Known, TLO, Depth);
+}
+
/// Check if a vector extract from a target-specific shuffle of a load can be
/// folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
@@ -40362,13 +40386,9 @@ static SDValue combinePMULDQ(SDNode *N,
if (ISD::isBuildVectorAllZeros(RHS.getNode()))
return RHS;
+ // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedMask(APInt::getLowBitsSet(64, 32));
-
- // PMULQDQ/PMULUDQ only uses lower 32 bits from each vector element.
- if (TLI.SimplifyDemandedBits(LHS, DemandedMask, DCI))
- return SDValue(N, 0);
- if (TLI.SimplifyDemandedBits(RHS, DemandedMask, DCI))
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI))
return SDValue(N, 0);
return SDValue();
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=345182&r1=345181&r2=345182&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Oct 24 12:11:28 2018
@@ -874,6 +874,12 @@ namespace llvm {
TargetLoweringOpt &TLO,
unsigned Depth) const override;
+ bool SimplifyDemandedBitsForTargetNode(SDValue Op,
+ const APInt &DemandedBits,
+ KnownBits &Known,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const override;
+
SDValue unwrapAddress(SDValue N) const override;
bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
Modified: llvm/trunk/test/CodeGen/X86/combine-pmuldq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-pmuldq.ll?rev=345182&r1=345181&r2=345182&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-pmuldq.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-pmuldq.ll Wed Oct 24 12:11:28 2018
@@ -47,26 +47,10 @@ define <2 x i64> @combine_shuffle_zero_p
; SSE-NEXT: pmuludq %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX2-LABEL: combine_shuffle_zero_pmuludq:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
-; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: combine_shuffle_zero_pmuludq:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
-; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: retq
-;
-; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq:
-; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
-; AVX512DQVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT: retq
+; AVX-LABEL: combine_shuffle_zero_pmuludq:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %a1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%3 = bitcast <4 x i32> %1 to <2 x i64>
@@ -84,22 +68,16 @@ define <4 x i64> @combine_shuffle_zero_p
;
; AVX2-LABEL: combine_shuffle_zero_pmuludq_256:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: combine_shuffle_zero_pmuludq_256:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq_256:
; AVX512DQVL: # %bb.0:
-; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; AVX512DQVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: retq
%1 = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
Modified: llvm/trunk/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll?rev=345182&r1=345181&r2=345182&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll Wed Oct 24 12:11:28 2018
@@ -143,31 +143,31 @@ define <4 x i32> @test_urem_odd_div(<4 x
define <4 x i32> @test_urem_even_div(<4 x i32> %X) nounwind readnone {
; CHECK-SSE2-LABEL: test_urem_even_div:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,2,0]
-; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,3435973837,2863311531,2454267027]
-; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,2454267027]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
; CHECK-SSE2-NEXT: psrld $1, %xmm3
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm3[3,3]
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
-; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2
-; CHECK-SSE2-NEXT: psrld $2, %xmm2
-; CHECK-SSE2-NEXT: psrld $3, %xmm1
-; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm3
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[3,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT: psrld $2, %xmm1
+; CHECK-SSE2-NEXT: psrld $3, %xmm2
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [6,10,12,14]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm5
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm1[1,2]
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,3,1]
-; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm2[1,2]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
+; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
@@ -377,30 +377,30 @@ define <4 x i32> @test_urem_pow2(<4 x i3
define <4 x i32> @test_urem_one(<4 x i32> %X) nounwind readnone {
; CHECK-SSE2-LABEL: test_urem_one:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,2,0]
-; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,0,2863311531,2454267027]
-; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,0,2863311531,2454267027]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
; CHECK-SSE2-NEXT: psrld $1, %xmm3
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm3[3,3]
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
-; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2
-; CHECK-SSE2-NEXT: psrld $2, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT: psrld $2, %xmm1
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[0,0]
-; CHECK-SSE2-NEXT: psrld $3, %xmm1
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm1[2,3]
-; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,1,12,14]
-; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm1[0,0]
+; CHECK-SSE2-NEXT: psrld $3, %xmm2
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[2,3]
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [6,1,12,14]
+; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
-; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm2[3,3]
-; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[3,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
More information about the llvm-commits
mailing list