[llvm] 3906ae3 - [DAGCombine] Check the uses of negated floating constant and remove the hack

Wed Mar 4 19:47:23 PST 2020

Author: QingShan Zhang
Date: 2020-03-05T03:42:50Z
New Revision: 3906ae387f0775dfe4426e4336748269fafbd190

URL: https://github.com/llvm/llvm-project/commit/3906ae387f0775dfe4426e4336748269fafbd190
DIFF: https://github.com/llvm/llvm-project/commit/3906ae387f0775dfe4426e4336748269fafbd190.diff

LOG: [DAGCombine] Check the uses of negated floating constant and remove the hack

PowerPC hits an assertion due to somewhat the same reason as https://reviews.llvm.org/D70975.
Though there are already some hack, it still failed with some case, when the operand 0 is NOT
a const fp, it is another fma that with const fp. And that const fp is negated which result in multi-uses.

A better fix is to check the uses of the negated const fp. If there are already use of its negated
value, we will have benefit as no extra Node is added.

Differential revision: https://reviews.llvm.org/D75501

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/PowerPC/fma-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index eafa0974a735..27a7cbe8a191 100644

--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5552,9 +5552,20 @@ TargetLowering::getNegatibleCost(SDValue Op, SelectionDAG &DAG,
   EVT VT = Op.getValueType();
   const SDNodeFlags Flags = Op->getFlags();
   const TargetOptions &Options = DAG.getTarget().Options;
-  if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
-                           isFPExtFree(VT, Op.getOperand(0).getValueType())))
-    return NegatibleCost::Expensive;
+  if (!Op.hasOneUse()) {
+    bool IsFreeExtend = Op.getOpcode() == ISD::FP_EXTEND &&
+                        isFPExtFree(VT, Op.getOperand(0).getValueType());
+
+    // If we already have the use of the negated floating constant, it is free
+    // to negate it even it has multiple uses.
+    bool IsFreeConstant =
+        Op.getOpcode() == ISD::ConstantFP &&
+        !getNegatedExpression(Op, DAG, LegalOperations, ForCodeSize)
+             .use_empty();
+
+    if (!IsFreeExtend && !IsFreeConstant)
+      return NegatibleCost::Expensive;
+  }
 
   // Don't recurse exponentially.
   if (Depth > SelectionDAG::MaxRecursionDepth)
@@ -5760,14 +5771,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
                                         ForCodeSize, Depth + 1);
     NegatibleCost V1 = getNegatibleCost(Op.getOperand(1), DAG, LegalOperations,
                                         ForCodeSize, Depth + 1);
-    // TODO: This is a hack. It is possible that costs have changed between now
-    //       and the initial calls to getNegatibleCost(). That is because we
-    //       are rewriting the expression, and that may change the number of
-    //       uses (and therefore the cost) of values. If the negation costs are
-    //       equal, only negate this value if it is a constant. Otherwise, try
-    //       operand 1. A better fix would eliminate uses as a cost factor or
-    //       track the change in uses as we rewrite the expression.
-    if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
+    if (V0 > V1) {
       // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
       SDValue Neg0 = getNegatedExpression(
           Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);

diff  --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll
index 88da295201fe..8973dc1334d7 100644
--- a/llvm/test/CodeGen/PowerPC/fma-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll
@@ -137,3 +137,104 @@ entry:
   %add = fsub double %mul, %a
   ret double %add
 }
+
+define float @fma_combine_no_ice() {
+; CHECK-FAST-LABEL: fma_combine_no_ice:
+; CHECK-FAST:       # %bb.0:
+; CHECK-FAST-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
+; CHECK-FAST-NEXT:    addis 4, 2, .LCPI4_1 at toc@ha
+; CHECK-FAST-NEXT:    lfs 0, .LCPI4_0 at toc@l(3)
+; CHECK-FAST-NEXT:    lfsx 2, 0, 3
+; CHECK-FAST-NEXT:    addis 3, 2, .LCPI4_2 at toc@ha
+; CHECK-FAST-NEXT:    lfs 3, .LCPI4_1 at toc@l(4)
+; CHECK-FAST-NEXT:    lfs 1, .LCPI4_2 at toc@l(3)
+; CHECK-FAST-NEXT:    xsmaddasp 3, 2, 0
+; CHECK-FAST-NEXT:    xsmaddasp 1, 2, 3
+; CHECK-FAST-NEXT:    xsnmsubasp 1, 3, 2
+; CHECK-FAST-NEXT:    blr
+;
+; CHECK-FAST-NOVSX-LABEL: fma_combine_no_ice:
+; CHECK-FAST-NOVSX:       # %bb.0:
+; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
+; CHECK-FAST-NOVSX-NEXT:    lfs 0, .LCPI4_0 at toc@l(3)
+; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, .LCPI4_1 at toc@ha
+; CHECK-FAST-NOVSX-NEXT:    lfs 1, 0(3)
+; CHECK-FAST-NOVSX-NEXT:    lfs 2, .LCPI4_1 at toc@l(3)
+; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, .LCPI4_2 at toc@ha
+; CHECK-FAST-NOVSX-NEXT:    fmadds 0, 1, 2, 0
+; CHECK-FAST-NOVSX-NEXT:    lfs 2, .LCPI4_2 at toc@l(3)
+; CHECK-FAST-NOVSX-NEXT:    fmadds 2, 1, 0, 2
+; CHECK-FAST-NOVSX-NEXT:    fnmsubs 1, 0, 1, 2
+; CHECK-FAST-NOVSX-NEXT:    blr
+;
+; CHECK-LABEL: fma_combine_no_ice:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
+; CHECK-NEXT:    addis 4, 2, .LCPI4_1 at toc@ha
+; CHECK-NEXT:    lfs 0, .LCPI4_0 at toc@l(3)
+; CHECK-NEXT:    lfsx 2, 0, 3
+; CHECK-NEXT:    addis 3, 2, .LCPI4_2 at toc@ha
+; CHECK-NEXT:    lfs 3, .LCPI4_1 at toc@l(4)
+; CHECK-NEXT:    lfs 1, .LCPI4_2 at toc@l(3)
+; CHECK-NEXT:    xsmaddasp 3, 2, 0
+; CHECK-NEXT:    xsmaddasp 1, 2, 3
+; CHECK-NEXT:    xsnmsubasp 1, 3, 2
+; CHECK-NEXT:    blr
+  %tmp = load float, float* undef, align 4
+  %tmp2 = load float, float* undef, align 4
+  %tmp3 = fmul fast float %tmp, 0x3FE372D780000000
+  %tmp4 = fadd fast float %tmp3, 1.000000e+00
+  %tmp5 = fmul fast float %tmp2, %tmp4
+  %tmp6 = load float, float* undef, align 4
+  %tmp7 = load float, float* undef, align 4
+  %tmp8 = fmul fast float %tmp7, 0x3FE372D780000000
+  %tmp9 = fsub fast float -1.000000e+00, %tmp8
+  %tmp10 = fmul fast float %tmp9, %tmp6
+  %tmp11 = fadd fast float %tmp5, 5.000000e-01
+  %tmp12 = fadd fast float %tmp11, %tmp10
+  ret float %tmp12
+}
+
+; This would crash while trying getNegatedExpression().
+define double @getNegatedExpression_crash(double %x, double %y) {
+; CHECK-FAST-LABEL: getNegatedExpression_crash:
+; CHECK-FAST:       # %bb.0:
+; CHECK-FAST-NEXT:    addis 3, 2, .LCPI5_1 at toc@ha
+; CHECK-FAST-NEXT:    addis 4, 2, .LCPI5_0 at toc@ha
+; CHECK-FAST-NEXT:    lfs 3, .LCPI5_1 at toc@l(3)
+; CHECK-FAST-NEXT:    lfs 4, .LCPI5_0 at toc@l(4)
+; CHECK-FAST-NEXT:    xssubdp 0, 1, 3
+; CHECK-FAST-NEXT:    xsmaddadp 3, 1, 4
+; CHECK-FAST-NEXT:    xsmaddadp 0, 3, 2
+; CHECK-FAST-NEXT:    fmr 1, 0
+; CHECK-FAST-NEXT:    blr
+;
+; CHECK-FAST-NOVSX-LABEL: getNegatedExpression_crash:
+; CHECK-FAST-NOVSX:       # %bb.0:
+; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
+; CHECK-FAST-NOVSX-NEXT:    addis 4, 2, .LCPI5_1 at toc@ha
+; CHECK-FAST-NOVSX-NEXT:    lfs 0, .LCPI5_0 at toc@l(3)
+; CHECK-FAST-NOVSX-NEXT:    lfs 3, .LCPI5_1 at toc@l(4)
+; CHECK-FAST-NOVSX-NEXT:    fmadd 3, 1, 3, 0
+; CHECK-FAST-NOVSX-NEXT:    fsub 0, 1, 0
+; CHECK-FAST-NOVSX-NEXT:    fmadd 1, 3, 2, 0
+; CHECK-FAST-NOVSX-NEXT:    blr
+;
+; CHECK-LABEL: getNegatedExpression_crash:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addis 3, 2, .LCPI5_1 at toc@ha
+; CHECK-NEXT:    addis 4, 2, .LCPI5_0 at toc@ha
+; CHECK-NEXT:    lfs 3, .LCPI5_1 at toc@l(3)
+; CHECK-NEXT:    lfs 4, .LCPI5_0 at toc@l(4)
+; CHECK-NEXT:    xssubdp 0, 1, 3
+; CHECK-NEXT:    xsmaddadp 3, 1, 4
+; CHECK-NEXT:    xsmaddadp 0, 3, 2
+; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    blr
+  %neg = fneg fast double %x
+  %fma = call fast double @llvm.fma.f64(double %neg, double 42.0, double -1.0)
+  %add = fadd fast double %x, 1.0
+  %fma1 = call fast double @llvm.fma.f64(double %fma, double %y, double %add)
+  ret double %fma1
+}
+declare double @llvm.fma.f64(double, double, double) nounwind readnone