[llvm] d577193 - [DAGCombine] Respect the uses when combine FMA for a*b+/-c*d
QingShan Zhang via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 17 20:36:26 PDT 2020
Author: QingShan Zhang
Date: 2020-03-18T03:34:27Z
New Revision: d577193c0f74104d322e4268279750bb6efccf8e
URL: https://github.com/llvm/llvm-project/commit/d577193c0f74104d322e4268279750bb6efccf8e
DIFF: https://github.com/llvm/llvm-project/commit/d577193c0f74104d322e4268279750bb6efccf8e.diff
LOG: [DAGCombine] Respect the uses when combine FMA for a*b+/-c*d
If it is a*b-c*d, it could be also folded into fma(a, b, -c*d) or fma(-c, d, a*b).
This patch is trying to respect the uses of a*b and c*d to make the best choice.
Differential Revision: https://reviews.llvm.org/D75982
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/PowerPC/fma-precision.ll
llvm/test/CodeGen/PowerPC/recipest.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2e0b8de9bcd6..254669b36470 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11983,13 +11983,24 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return SDValue();
};
- // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (SDValue V = tryToFoldXYSubZ(N0, N1))
- return V;
-
- // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
- if (SDValue V = tryToFoldXSubYZ(N0, N1))
- return V;
+ // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
+ (N0.getNode()->use_size() > N1.getNode()->use_size())) {
+ // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
+ if (SDValue V = tryToFoldXSubYZ(N0, N1))
+ return V;
+ // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
+ if (SDValue V = tryToFoldXYSubZ(N0, N1))
+ return V;
+ } else {
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (SDValue V = tryToFoldXYSubZ(N0, N1))
+ return V;
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ if (SDValue V = tryToFoldXSubYZ(N0, N1))
+ return V;
+ }
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
diff --git a/llvm/test/CodeGen/PowerPC/fma-precision.ll b/llvm/test/CodeGen/PowerPC/fma-precision.ll
index 7f832d8bf4ff..89b5e097d8a6 100644
--- a/llvm/test/CodeGen/PowerPC/fma-precision.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-precision.ll
@@ -5,10 +5,10 @@
define double @fsub1(double %a, double %b, double %c, double %d) {
; CHECK-LABEL: fsub1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xsmuldp 3, 4, 3
; CHECK-NEXT: xsmuldp 0, 2, 1
-; CHECK-NEXT: xsmsubadp 3, 2, 1
-; CHECK-NEXT: xsmuldp 1, 0, 3
+; CHECK-NEXT: fmr 1, 0
+; CHECK-NEXT: xsnmsubadp 1, 4, 3
+; CHECK-NEXT: xsmuldp 1, 0, 1
; CHECK-NEXT: blr
entry:
%mul = fmul fast double %b, %a
@@ -101,13 +101,12 @@ entry:
define double @fma_multi_uses1(double %a, double %b, double %c, double %d, double* %p1, double* %p2, double* %p3) {
; CHECK-LABEL: fma_multi_uses1:
; CHECK: # %bb.0:
-; CHECK-NEXT: xsmuldp 5, 1, 2
+; CHECK-NEXT: xsmuldp 1, 1, 2
; CHECK-NEXT: xsmuldp 0, 3, 4
-; CHECK-NEXT: stfd 5, 0(7)
-; CHECK-NEXT: stfd 5, 0(8)
+; CHECK-NEXT: stfd 1, 0(7)
+; CHECK-NEXT: stfd 1, 0(8)
+; CHECK-NEXT: xsnmsubadp 1, 3, 4
; CHECK-NEXT: stfd 0, 0(9)
-; CHECK-NEXT: xsmsubadp 0, 1, 2
-; CHECK-NEXT: fmr 1, 0
; CHECK-NEXT: blr
%ab = fmul fast double %a, %b
%cd = fmul fast double %c, %d
@@ -142,16 +141,14 @@ define double @fma_multi_uses3(double %a, double %b, double %c, double %d, doubl
; CHECK-LABEL: fma_multi_uses3:
; CHECK: # %bb.0:
; CHECK-NEXT: xsmuldp 0, 1, 2
-; CHECK-NEXT: xsmuldp 3, 3, 4
+; CHECK-NEXT: xsmuldp 1, 5, 6
; CHECK-NEXT: ld 3, 96(1)
; CHECK-NEXT: stfd 0, 0(9)
; CHECK-NEXT: stfd 0, 0(10)
-; CHECK-NEXT: fmr 0, 3
-; CHECK-NEXT: xsmsubadp 3, 1, 2
-; CHECK-NEXT: xsmsubadp 0, 5, 6
-; CHECK-NEXT: xsmuldp 4, 5, 6
-; CHECK-NEXT: stfd 4, 0(3)
-; CHECK-NEXT: xsadddp 1, 3, 0
+; CHECK-NEXT: stfd 1, 0(3)
+; CHECK-NEXT: xsnmsubadp 1, 3, 4
+; CHECK-NEXT: xsnmsubadp 0, 3, 4
+; CHECK-NEXT: xsadddp 1, 0, 1
; CHECK-NEXT: blr
%ab = fmul fast double %a, %b
%cd = fmul fast double %c, %d
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index 7323d1e775b1..7fb9b07152ef 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -177,12 +177,11 @@ define float @rsqrt_fmul_fmf(float %a, float %b, float %c) {
; CHECK-NEXT: fmuls 1, 1, 0
; CHECK-NEXT: fmadds 1, 1, 0, 4
; CHECK-NEXT: fmuls 0, 0, 5
-; CHECK-NEXT: fres 5, 2
+; CHECK-NEXT: fmuls 0, 0, 1
+; CHECK-NEXT: fres 1, 2
; CHECK-NEXT: fmuls 4, 0, 1
-; CHECK-NEXT: fmuls 4, 4, 5
-; CHECK-NEXT: fmuls 2, 2, 4
-; CHECK-NEXT: fmsubs 0, 0, 1, 2
-; CHECK-NEXT: fmadds 0, 5, 0, 4
+; CHECK-NEXT: fnmsubs 0, 2, 4, 0
+; CHECK-NEXT: fmadds 0, 1, 0, 4
; CHECK-NEXT: fmuls 1, 3, 0
; CHECK-NEXT: blr
%x = call fast float @llvm.sqrt.f32(float %a)
More information about the llvm-commits
mailing list