[llvm] 1ddac95 - [X86][SSE] Peek though MOVMSK source sign bits using SimplifyMultipleUseDemandedBits
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu May 28 05:42:37 PDT 2020
Author: Simon Pilgrim
Date: 2020-05-28T13:42:24+01:00
New Revision: 1ddac9563d7f2414e6c4302f9902ac1294966161
URL: https://github.com/llvm/llvm-project/commit/1ddac9563d7f2414e6c4302f9902ac1294966161
DIFF: https://github.com/llvm/llvm-project/commit/1ddac9563d7f2414e6c4302f9902ac1294966161.diff
LOG: [X86][SSE] Peek though MOVMSK source sign bits using SimplifyMultipleUseDemandedBits
Allows SimplifyDemandedBitsForTargetNode to peek through multi-use ops where MOVMSK only demands the signbit of each vector element.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vec_saddo.ll
llvm/test/CodeGen/X86/vec_smulo.ll
llvm/test/CodeGen/X86/vec_ssubo.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d70b5a7f3a22..8ec958338c02 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37363,14 +37363,20 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// MOVMSK only uses the MSB from each vector element.
KnownBits KnownSrc;
- if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), DemandedElts,
- KnownSrc, TLO, Depth + 1))
+ APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
+ Depth + 1))
return true;
if (KnownSrc.One[SrcBits - 1])
Known.One.setLowBits(NumElts);
else if (KnownSrc.Zero[SrcBits - 1])
Known.Zero.setLowBits(NumElts);
+
+ // Attempt to avoid multi-use os if we don't need anything from it.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
return false;
}
case X86ISD::BEXTR: {
diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll
index 5223187eee7d..dd3a733ab217 100644
--- a/llvm/test/CodeGen/X86/vec_saddo.ll
+++ b/llvm/test/CodeGen/X86/vec_saddo.ll
@@ -1078,12 +1078,13 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: pslld $31, %xmm1
-; SSE-NEXT: psrad $31, %xmm1
-; SSE-NEXT: pcmpeqd %xmm1, %xmm0
-; SSE-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movmskps %xmm1, %eax
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: pxor %xmm0, %xmm1
; SSE-NEXT: movb %al, (%rdi)
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: saddo_v4i1:
@@ -1094,8 +1095,8 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovmskps %xmm1, %eax
@@ -1110,8 +1111,8 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpslld $31, %xmm0, %xmm1
-; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovmskps %xmm1, %eax
diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index a3e28ae8d7ba..5fde07d1269d 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -3422,21 +3422,20 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; SSE2-NEXT: psubd %xmm2, %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT: pxor %xmm2, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: pslld $31, %xmm3
-; SSE2-NEXT: psrad $31, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
-; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: movmskps %xmm3, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: pslld $31, %xmm0
+; SSE2-NEXT: movmskps %xmm0, %eax
+; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: movb %al, (%rdi)
; SSE2-NEXT: retq
;
@@ -3461,21 +3460,20 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; SSSE3-NEXT: psubd %xmm2, %xmm4
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
-; SSSE3-NEXT: pcmpeqd %xmm4, %xmm1
-; SSSE3-NEXT: pcmpeqd %xmm2, %xmm2
-; SSSE3-NEXT: pxor %xmm2, %xmm1
-; SSSE3-NEXT: movdqa %xmm0, %xmm3
-; SSSE3-NEXT: pslld $31, %xmm3
-; SSSE3-NEXT: psrad $31, %xmm3
-; SSSE3-NEXT: pcmpeqd %xmm3, %xmm0
-; SSSE3-NEXT: pxor %xmm2, %xmm0
-; SSSE3-NEXT: por %xmm1, %xmm0
-; SSSE3-NEXT: movmskps %xmm3, %eax
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: pslld $31, %xmm0
+; SSSE3-NEXT: movmskps %xmm0, %eax
+; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
+; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
+; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm2
+; SSSE3-NEXT: pxor %xmm1, %xmm0
+; SSSE3-NEXT: por %xmm2, %xmm0
; SSSE3-NEXT: movb %al, (%rdi)
; SSSE3-NEXT: retq
;
@@ -3494,17 +3492,16 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; SSE41-NEXT: pmulld %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pslld $31, %xmm3
+; SSE41-NEXT: movmskps %xmm3, %eax
; SSE41-NEXT: psrad $31, %xmm3
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm3
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE41-NEXT: pxor %xmm0, %xmm3
-; SSE41-NEXT: movdqa %xmm1, %xmm2
-; SSE41-NEXT: pslld $31, %xmm2
-; SSE41-NEXT: psrad $31, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm3
+; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm1
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: por %xmm3, %xmm0
-; SSE41-NEXT: movmskps %xmm2, %eax
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: pxor %xmm3, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: movb %al, (%rdi)
; SSE41-NEXT: retq
;
@@ -3526,8 +3523,8 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpslld $31, %xmm0, %xmm3
-; AVX1-NEXT: vpsrad $31, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpsrad $31, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovmskps %xmm3, %eax
@@ -3552,8 +3549,8 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpslld $31, %xmm0, %xmm3
-; AVX2-NEXT: vpsrad $31, %xmm3, %xmm3
-; AVX2-NEXT: vpcmpeqd %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm3, %xmm4
+; AVX2-NEXT: vpcmpeqd %xmm0, %xmm4, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovmskps %xmm3, %eax
diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll
index 21a5e71036d7..8ab9367c32f8 100644
--- a/llvm/test/CodeGen/X86/vec_ssubo.ll
+++ b/llvm/test/CodeGen/X86/vec_ssubo.ll
@@ -1088,12 +1088,13 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: pslld $31, %xmm1
-; SSE-NEXT: psrad $31, %xmm1
-; SSE-NEXT: pcmpeqd %xmm1, %xmm0
-; SSE-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movmskps %xmm1, %eax
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: pxor %xmm0, %xmm1
; SSE-NEXT: movb %al, (%rdi)
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: ssubo_v4i1:
@@ -1104,8 +1105,8 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
-; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovmskps %xmm1, %eax
@@ -1120,8 +1121,8 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpslld $31, %xmm0, %xmm1
-; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovmskps %xmm1, %eax
More information about the llvm-commits
mailing list