[llvm] r370071 - [DAGCombiner] cancel fnegs from multiplied operands of FMA

Tue Aug 27 08:17:46 PDT 2019

Author: spatel
Date: Tue Aug 27 08:17:46 2019
New Revision: 370071

URL: http://llvm.org/viewvc/llvm-project?rev=370071&view=rev
Log:
[DAGCombiner] cancel fnegs from multiplied operands of FMA

(-X) * (-Y) + Z --> X * Y + Z

This is a missing optimization that shows up as a potential regression in D66050,
so we should solve it first. We appear to be partly missing this fold in IR as well.

We do handle the simpler case already:
(-X) * (-Y) --> X * Y

And it might be beneficial to make the constraint less conservative (eg, if both
operands are cheap, but not necessarily cheaper), but that causes infinite looping
for the existing fmul transform.

Differential Revision: https://reviews.llvm.org/D66755

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll
    llvm/trunk/test/CodeGen/PowerPC/fneg.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=370071&r1=370070&r2=370071&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Aug 27 08:17:46 2019
@@ -516,6 +516,7 @@ namespace {
     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
                            SDValue &CC) const;
     bool isOneUseSetCC(SDValue N) const;
+    bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
 
     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                          unsigned HiOp);
@@ -12110,6 +12111,22 @@ SDValue DAGCombiner::visitFSUB(SDNode *N
   return SDValue();
 }
 
+/// Return true if both inputs are at least as cheap in negated form and at
+/// least one input is strictly cheaper in negated form.
+bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
+  const TargetOptions &Options = DAG.getTarget().Options;
+  if (char LHSNeg = isNegatibleForFree(X, LegalOperations, TLI, &Options,
+                                   ForCodeSize))
+    if (char RHSNeg = isNegatibleForFree(Y, LegalOperations, TLI, &Options,
+                                         ForCodeSize))
+      // Both negated operands are at least as cheap as their counterparts.
+      // Check to see if at least one is cheaper negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return true;
+
+  return false;
+}
+
 SDValue DAGCombiner::visitFMUL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -12180,21 +12197,11 @@ SDValue DAGCombiner::visitFMUL(SDNode *N
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
       return DAG.getNode(ISD::FNEG, DL, VT, N0);
 
-  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
-                                       ForCodeSize)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
-                                         ForCodeSize)) {
-      // Both can be negated for free, check to see if at least one is cheaper
-      // negated.
-      if (LHSNeg == 2 || RHSNeg == 2)
-        return DAG.getNode(ISD::FMUL, DL, VT,
-                           GetNegatedExpression(N0, DAG, LegalOperations,
-                                                ForCodeSize),
-                           GetNegatedExpression(N1, DAG, LegalOperations,
-                                                ForCodeSize),
-                           Flags);
-    }
+  // -N0 * -N1 --> N0 * N1
+  if (isCheaperToUseNegatedFPOps(N0, N1)) {
+    SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+    SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+    return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
   }
 
   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
@@ -12273,6 +12280,13 @@ SDValue DAGCombiner::visitFMA(SDNode *N)
     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
   }
 
+  // (-N0 * -N1) + N2 --> (N0 * N1) + N2
+  if (isCheaperToUseNegatedFPOps(N0, N1)) {
+    SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+    SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+    return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
+  }
+
   if (UnsafeFPMath) {
     if (N0CFP && N0CFP->isZero())
       return N2;

Modified: llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll?rev=370071&r1=370070&r2=370071&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fneg-combines.ll Tue Aug 27 08:17:46 2019
@@ -1205,7 +1205,7 @@ define amdgpu_kernel void @v_fneg_fma_x_
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
 
-; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]]
+; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
 
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]

Modified: llvm/trunk/test/CodeGen/PowerPC/fneg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fneg.ll?rev=370071&r1=370070&r2=370071&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fneg.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/fneg.ll Tue Aug 27 08:17:46 2019
@@ -20,8 +20,7 @@ declare float @llvm.fmuladd.f32(float, f
 define float @fma_fneg_fneg(float %x, float %y, float %z) {
 ; CHECK-LABEL: fma_fneg_fneg:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fneg f0, f2
-; CHECK-NEXT:    fnmsubs f1, f1, f0, f3
+; CHECK-NEXT:    fmadds f1, f1, f2, f3
 ; CHECK-NEXT:    blr
   %negx = fneg float %x
   %negy = fneg float %y
@@ -32,8 +31,8 @@ define float @fma_fneg_fneg(float %x, fl
 define float @fma_fneg_fsub(float %x, float %y0, float %y1, float %z) {
 ; CHECK-LABEL: fma_fneg_fsub:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fsubs f0, f2, f3
-; CHECK-NEXT:    fnmsubs f1, f1, f0, f4
+; CHECK-NEXT:    fsubs f0, f3, f2
+; CHECK-NEXT:    fmadds f1, f1, f0, f4
 ; CHECK-NEXT:    blr
   %negx = fneg float %x
   %negy = fsub nsz float %y0, %y1