[llvm] bf30c48 - [X86] SimplifyDemandedVectorEltsForTargetNode - simplify PMADDWD for known zero elements
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 4 06:37:46 PDT 2021
Author: Simon Pilgrim
Date: 2021-10-04T14:36:45+01:00
New Revision: bf30c48419d8d0203088ef1d84599e7b3d685a4c
URL: https://github.com/llvm/llvm-project/commit/bf30c48419d8d0203088ef1d84599e7b3d685a4c
DIFF: https://github.com/llvm/llvm-project/commit/bf30c48419d8d0203088ef1d84599e7b3d685a4c.diff
LOG: [X86] SimplifyDemandedVectorEltsForTargetNode - simplify PMADDWD for known zero elements
Noticed while investigating the regressions in D110995 - if the RHS element is already zero, then we don't need the corresponding LHS element.
Technically we could also recheck RHS once we have LHS's known zeros, but I haven't seen any missed opportunities from that yet.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/madd.ll
llvm/test/CodeGen/X86/shrink_vmul.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2ed92c4f7f20..3c71f6f8bcf4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39641,6 +39641,26 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
KnownZero = LHSZero | RHSZero;
break;
}
+ case X86ISD::VPMADDWD: {
+ APInt LHSUndef, LHSZero;
+ APInt RHSUndef, RHSZero;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, 2 * NumElts);
+
+ APInt DemandedRHSElts = DemandedSrcElts;
+ if (SimplifyDemandedVectorElts(RHS, DemandedRHSElts, RHSUndef, RHSZero, TLO,
+ Depth + 1))
+ return true;
+
+ // If RHS elements are known zero then we don't need the LHS equivalent.
+ APInt DemandedLHSElts = DemandedSrcElts & ~RHSZero;
+ if (SimplifyDemandedVectorElts(LHS, DemandedLHSElts, LHSUndef, LHSZero, TLO,
+ Depth + 1))
+ return true;
+ // TODO: Multiply by zero.
+ break;
+ }
case X86ISD::PSADBW: {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll
index 08b8db7067d5..a0a3346d7e9f 100644
--- a/llvm/test/CodeGen/X86/madd.ll
+++ b/llvm/test/CodeGen/X86/madd.ll
@@ -2060,8 +2060,7 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) {
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
; SSE2-NEXT: pmuludq %xmm3, %xmm1
; SSE2-NEXT: pmuludq %xmm4, %xmm2
-; SSE2-NEXT: pxor %xmm3, %xmm3
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll
index 8133e660f5b8..d8e7f3358b1f 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul.ll
@@ -1080,7 +1080,7 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b
; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: pxor %xmm2, %xmm2
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7]
; X86-SSE-NEXT: pmaddwd %xmm0, %xmm1
; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
@@ -1109,7 +1109,7 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b
; X64-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-SSE-NEXT: pxor %xmm2, %xmm2
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X64-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7]
; X64-SSE-NEXT: pmaddwd %xmm0, %xmm1
; X64-SSE-NEXT: movq %xmm1, (%rax,%rdx,4)
; X64-SSE-NEXT: retq
@@ -1860,8 +1860,7 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: movl c, %edx
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: pxor %xmm1, %xmm1
-; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7]
; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
@@ -1881,8 +1880,7 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) {
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: movq c(%rip), %rax
; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-SSE-NEXT: pxor %xmm1, %xmm1
-; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7]
; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq
More information about the llvm-commits
mailing list