[llvm] 05443ad - [X86] isHorizontalBinOp - always create HADD/SUB if it will be merged with another existing HADD/SUB
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue May 28 02:55:19 PDT 2024
Author: Simon Pilgrim
Date: 2024-05-28T10:55:05+01:00
New Revision: 05443aded7b2fa43af01bc0cfab024277855ca30
URL: https://github.com/llvm/llvm-project/commit/05443aded7b2fa43af01bc0cfab024277855ca30
DIFF: https://github.com/llvm/llvm-project/commit/05443aded7b2fa43af01bc0cfab024277855ca30.diff
LOG: [X86] isHorizontalBinOp - always create HADD/SUB if it will be merged with another existing HADD/SUB
Fixes some more cases from #34072 where undemanded vector elements prevent HADD/SUB being matched on slow targets
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/haddsub-undef.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7df8ffb7d0396..2d8343ffa1a0b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51571,7 +51571,8 @@ static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG,
static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
SelectionDAG &DAG, const X86Subtarget &Subtarget,
bool IsCommutative,
- SmallVectorImpl<int> &PostShuffleMask) {
+ SmallVectorImpl<int> &PostShuffleMask,
+ bool ForceHorizOp) {
// If either operand is undef, bail out. The binop should be simplified.
if (LHS.isUndef() || RHS.isUndef())
return false;
@@ -51734,13 +51735,12 @@ static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
// If the source nodes are already used in HorizOps then always accept this.
// Shuffle folding should merge these back together.
- bool FoundHorizLHS = llvm::any_of(NewLHS->uses(), [&](SDNode *User) {
+ auto FoundHorizUser = [&](SDNode *User) {
return User->getOpcode() == HOpcode && User->getValueType(0) == VT;
- });
- bool FoundHorizRHS = llvm::any_of(NewRHS->uses(), [&](SDNode *User) {
- return User->getOpcode() == HOpcode && User->getValueType(0) == VT;
- });
- bool ForceHorizOp = FoundHorizLHS && FoundHorizRHS;
+ };
+ ForceHorizOp =
+ ForceHorizOp || (llvm::any_of(NewLHS->uses(), FoundHorizUser) &&
+ llvm::any_of(NewRHS->uses(), FoundHorizUser));
// Assume a SingleSource HOP if we only shuffle one input and don't need to
// shuffle the result.
@@ -51763,6 +51763,13 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
bool IsAdd = (Opcode == ISD::FADD) || (Opcode == ISD::ADD);
SmallVector<int, 8> PostShuffleMask;
+ auto MergableHorizOp = [N](unsigned HorizOpcode) {
+ return N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::VECTOR_SHUFFLE &&
+ (N->use_begin()->getOperand(0).getOpcode() == HorizOpcode ||
+ N->use_begin()->getOperand(1).getOpcode() == HorizOpcode);
+ };
+
switch (Opcode) {
case ISD::FADD:
case ISD::FSUB:
@@ -51772,7 +51779,7 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
auto HorizOpcode = IsAdd ? X86ISD::FHADD : X86ISD::FHSUB;
if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd,
- PostShuffleMask)) {
+ PostShuffleMask, MergableHorizOp(HorizOpcode))) {
SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
if (!PostShuffleMask.empty())
HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,
@@ -51789,7 +51796,7 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
auto HorizOpcode = IsAdd ? X86ISD::HADD : X86ISD::HSUB;
if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd,
- PostShuffleMask)) {
+ PostShuffleMask, MergableHorizOp(HorizOpcode))) {
auto HOpBuilder = [HorizOpcode](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops);
diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll
index 58afc4732e31b..6aa53278d81ef 100644
--- a/llvm/test/CodeGen/X86/haddsub-undef.ll
+++ b/llvm/test/CodeGen/X86/haddsub-undef.ll
@@ -1052,31 +1052,15 @@ define <4 x float> @PR34724_add_v4f32_0u23(<4 x float> %0, <4 x float> %1) {
}
define <4 x float> @PR34724_add_v4f32_01u3(<4 x float> %0, <4 x float> %1) {
-; SSE-SLOW-LABEL: PR34724_add_v4f32_01u3:
-; SSE-SLOW: # %bb.0:
-; SSE-SLOW-NEXT: haddps %xmm0, %xmm0
-; SSE-SLOW-NEXT: movsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
-; SSE-SLOW-NEXT: addps %xmm1, %xmm2
-; SSE-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
-; SSE-SLOW-NEXT: retq
-;
-; SSE-FAST-LABEL: PR34724_add_v4f32_01u3:
-; SSE-FAST: # %bb.0:
-; SSE-FAST-NEXT: haddps %xmm1, %xmm0
-; SSE-FAST-NEXT: retq
-;
-; AVX-SLOW-LABEL: PR34724_add_v4f32_01u3:
-; AVX-SLOW: # %bb.0:
-; AVX-SLOW-NEXT: vhaddps %xmm0, %xmm0, %xmm0
-; AVX-SLOW-NEXT: vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
-; AVX-SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1
-; AVX-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; AVX-SLOW-NEXT: retq
+; SSE-LABEL: PR34724_add_v4f32_01u3:
+; SSE: # %bb.0:
+; SSE-NEXT: haddps %xmm1, %xmm0
+; SSE-NEXT: retq
;
-; AVX-FAST-LABEL: PR34724_add_v4f32_01u3:
-; AVX-FAST: # %bb.0:
-; AVX-FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0
-; AVX-FAST-NEXT: retq
+; AVX-LABEL: PR34724_add_v4f32_01u3:
+; AVX: # %bb.0:
+; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%3 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 0, i32 2>
%4 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 1, i32 3>
%5 = fadd <2 x float> %3, %4
@@ -1088,31 +1072,15 @@ define <4 x float> @PR34724_add_v4f32_01u3(<4 x float> %0, <4 x float> %1) {
}
define <4 x float> @PR34724_add_v4f32_012u(<4 x float> %0, <4 x float> %1) {
-; SSE-SLOW-LABEL: PR34724_add_v4f32_012u:
-; SSE-SLOW: # %bb.0:
-; SSE-SLOW-NEXT: haddps %xmm0, %xmm0
-; SSE-SLOW-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; SSE-SLOW-NEXT: addps %xmm1, %xmm2
-; SSE-SLOW-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE-SLOW-NEXT: retq
-;
-; SSE-FAST-LABEL: PR34724_add_v4f32_012u:
-; SSE-FAST: # %bb.0:
-; SSE-FAST-NEXT: haddps %xmm1, %xmm0
-; SSE-FAST-NEXT: retq
-;
-; AVX-SLOW-LABEL: PR34724_add_v4f32_012u:
-; AVX-SLOW: # %bb.0:
-; AVX-SLOW-NEXT: vhaddps %xmm0, %xmm0, %xmm0
-; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; AVX-SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1
-; AVX-SLOW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX-SLOW-NEXT: retq
+; SSE-LABEL: PR34724_add_v4f32_012u:
+; SSE: # %bb.0:
+; SSE-NEXT: haddps %xmm1, %xmm0
+; SSE-NEXT: retq
;
-; AVX-FAST-LABEL: PR34724_add_v4f32_012u:
-; AVX-FAST: # %bb.0:
-; AVX-FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0
-; AVX-FAST-NEXT: retq
+; AVX-LABEL: PR34724_add_v4f32_012u:
+; AVX: # %bb.0:
+; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%3 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 0, i32 2>
%4 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 1, i32 3>
%5 = fadd <2 x float> %3, %4
More information about the llvm-commits
mailing list