[llvm] r350059 - [X86] Use GetDemandedBits to simplify the operands of PMULDQ/PMULUDQ.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 24 11:40:20 PST 2018
Author: ctopper
Date: Mon Dec 24 11:40:20 2018
New Revision: 350059
URL: http://llvm.org/viewvc/llvm-project?rev=350059&view=rev
Log:
[X86] Use GetDemandedBits to simplify the operands of PMULDQ/PMULUDQ.
This is an alternative to what I attempted in D56057.
GetDemandedBits is a special version of SimplifyDemandedBits that allows simplifications even when the operand has other uses. GetDemandedBits will only do simplifications that allow a node to be bypassed. It won't create new nodes or alter any of the other users.
I had to add support for bypassing SIGN_EXTEND_INREG to GetDemandedBits.
Based on a patch that Simon Pilgrim sent me in email.
Fixes PR40142.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/pmul.ll
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=350059&r1=350058&r2=350059&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Dec 24 11:40:20 2018
@@ -2118,6 +2118,15 @@ SDValue SelectionDAG::GetDemandedBits(SD
return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
break;
}
+ case ISD::SIGN_EXTEND_INREG:
+ EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT();
+ unsigned ExVTBits = ExVT.getScalarSizeInBits();
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if (Mask.getActiveBits() <= ExVTBits)
+ return V.getOperand(0);
+
+ break;
}
return SDValue();
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=350059&r1=350058&r2=350059&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 24 11:40:20 2018
@@ -41212,6 +41212,15 @@ static SDValue combinePMULDQ(SDNode *N,
if (ISD::isBuildVectorAllZeros(RHS.getNode()))
return RHS;
+ // Aggressively peek through ops to get at the demanded low bits.
+ APInt DemandedMask = APInt::getLowBitsSet(64, 32);
+ SDValue DemandedLHS = DAG.GetDemandedBits(LHS, DemandedMask);
+ SDValue DemandedRHS = DAG.GetDemandedBits(RHS, DemandedMask);
+ if (DemandedLHS || DemandedRHS)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ DemandedLHS ? DemandedLHS : LHS,
+ DemandedRHS ? DemandedRHS : RHS);
+
// PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI))
Modified: llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll?rev=350059&r1=350058&r2=350059&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll Mon Dec 24 11:40:20 2018
@@ -1823,12 +1823,6 @@ declare <16 x i16> @llvm.x86.avx2.mpsadb
define <4 x i64> @test_mm256_mul_epi32(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_mm256_mul_epi32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpsllq $32, %ymm0, %ymm2
-; CHECK-NEXT: vpsrad $31, %ymm2, %ymm2
-; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
-; CHECK-NEXT: vpsllq $32, %ymm1, %ymm2
-; CHECK-NEXT: vpsrad $31, %ymm2, %ymm2
-; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%A = shl <4 x i64> %a0, <i64 32, i64 32, i64 32, i64 32>
@@ -1843,9 +1837,6 @@ declare <4 x i64> @llvm.x86.avx2.pmul.dq
define <4 x i64> @test_mm256_mul_epu32(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_mm256_mul_epu32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%A = and <4 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=350059&r1=350058&r2=350059&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Mon Dec 24 11:40:20 2018
@@ -1651,10 +1651,6 @@ define <8 x i64> @test_mm512_zextsi256_s
define <8 x i64> @test_mm512_mul_epi32(<8 x i64> %__A, <8 x i64> %__B) nounwind {
; CHECK-LABEL: test_mm512_mul_epi32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpsllq $32, %zmm0, %zmm0
-; CHECK-NEXT: vpsraq $32, %zmm0, %zmm0
-; CHECK-NEXT: vpsllq $32, %zmm1, %zmm1
-; CHECK-NEXT: vpsraq $32, %zmm1, %zmm1
; CHECK-NEXT: vpmuldq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%tmp = shl <8 x i64> %__A, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
@@ -1718,11 +1714,6 @@ entry:
define <8 x i64> @test_mm512_mul_epu32(<8 x i64> %__A, <8 x i64> %__B) nounwind {
; CHECK-LABEL: test_mm512_mul_epu32:
; CHECK: # %bb.0:
-; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA
-; CHECK-NEXT: kmovw %eax, %k0
-; CHECK-NEXT: knotw %k0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%tmp = and <8 x i64> %__A, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
Modified: llvm/trunk/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmul.ll?rev=350059&r1=350058&r2=350059&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmul.ll Mon Dec 24 11:40:20 2018
@@ -1388,29 +1388,13 @@ define <2 x i64> @pmuldq_square(<2 x i64
;
; SSE41-LABEL: pmuldq_square:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psllq $32, %xmm1
-; SSE41-NEXT: psrad $31, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; SSE41-NEXT: pmuldq %xmm1, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pmuldq %xmm0, %xmm0
; SSE41-NEXT: retq
;
-; AVX2-LABEL: pmuldq_square:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpsllq $32, %xmm0, %xmm1
-; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: vpmuldq %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: pmuldq_square:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0
-; AVX512-NEXT: vpsraq $32, %zmm0, %zmm0
-; AVX512-NEXT: vpmuldq %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: pmuldq_square:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmuldq %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = shl <2 x i64> %x, <i64 32, i64 32>
%2 = ashr exact <2 x i64> %1, <i64 32, i64 32>
%3 = mul nsw <2 x i64> %2, %2
@@ -1418,24 +1402,13 @@ define <2 x i64> @pmuldq_square(<2 x i64
}
define <2 x i64> @pmuludq_square(<2 x i64> %x) {
-; SSE2-LABEL: pmuludq_square:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
-; SSE2-NEXT: pmuludq %xmm0, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: pmuludq_square:
-; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; SSE41-NEXT: pmuludq %xmm1, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: pmuludq_square:
+; SSE: # %bb.0:
+; SSE-NEXT: pmuludq %xmm0, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: pmuludq_square:
; AVX: # %bb.0:
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX-NEXT: vpmuludq %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = and <2 x i64> %x, <i64 4294967295, i64 4294967295>
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll?rev=350059&r1=350058&r2=350059&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll Mon Dec 24 11:40:20 2018
@@ -2758,23 +2758,13 @@ define i32 @test_mm_movemask_pd(<2 x dou
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
-; X86-SSE-LABEL: test_mm_mul_epu32:
-; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
-; X86-SSE-NEXT: # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
-; X86-SSE-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
-; X86-SSE-NEXT: pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2]
-; X86-SSE-NEXT: pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca]
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
-; X86-SSE-NEXT: retl # encoding: [0xc3]
+; SSE-LABEL: test_mm_mul_epu32:
+; SSE: # %bb.0:
+; SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
+; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_mul_epu32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2]
-; AVX1-NEXT: vpblendw $204, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc2,0xcc]
-; AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; AVX1-NEXT: vpblendw $204, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x0e,0xca,0xcc]
-; AVX1-NEXT: # xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf4,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
@@ -2787,16 +2777,6 @@ define <2 x i64> @test_mm_mul_epu32(<2 x
; AVX512-NEXT: # xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX512-NEXT: vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
-;
-; X64-SSE-LABEL: test_mm_mul_epu32:
-; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
-; X64-SSE-NEXT: # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
-; X64-SSE-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
-; X64-SSE-NEXT: pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2]
-; X64-SSE-NEXT: pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca]
-; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1]
-; X64-SSE-NEXT: retq # encoding: [0xc3]
%A = and <2 x i64> %a0, <i64 4294967295, i64 4294967295>
%B = and <2 x i64> %a1, <i64 4294967295, i64 4294967295>
%res = mul nuw <2 x i64> %A, %B
Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll?rev=350059&r1=350058&r2=350059&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll Mon Dec 24 11:40:20 2018
@@ -832,26 +832,11 @@ declare <8 x i16> @llvm.x86.sse41.mpsadb
define <2 x i64> @test_mm_mul_epi32(<2 x i64> %a0, <2 x i64> %a1) {
; SSE-LABEL: test_mm_mul_epi32:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psllq $32, %xmm2
-; SSE-NEXT: psrad $31, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: psllq $32, %xmm0
-; SSE-NEXT: psrad $31, %xmm0
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
-; SSE-NEXT: pmuldq %xmm0, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pmuldq %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
; AVX1-LABEL: test_mm_mul_epi32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllq $32, %xmm0, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; AVX1-NEXT: vpsllq $32, %xmm1, %xmm2
-; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: ret{{[l|q]}}
;
More information about the llvm-commits
mailing list