[llvm] 4d7201e - DAG: Stop trying to fold FP -(x-y) -> y-x in getNode with nsz

Tue Dec 31 20:15:27 PST 2019

Author: Matt Arsenault
Date: 2019-12-31T22:49:51-05:00
New Revision: 4d7201e7b988b62a6ca30416fd03847b5a39dae0

URL: https://github.com/llvm/llvm-project/commit/4d7201e7b988b62a6ca30416fd03847b5a39dae0
DIFF: https://github.com/llvm/llvm-project/commit/4d7201e7b988b62a6ca30416fd03847b5a39dae0.diff

LOG: DAG: Stop trying to fold FP -(x-y) -> y-x in getNode with nsz

This was increasing the number of instructions when fsub was legalized
on AMDGPU with no signed zeros enabled. This fold should be guarded by
hasOneUse, and I don't think getNode should be doing that. The same
fold is already done as a regular combine through isNegatibleForFree.

This does require duplicating, even though isNegatibleForFree does
this combine already (and properly checks hasOneUse) to avoid one PPC
regression. In the regression, the outer fneg has nsz but the fsub
operand does not. isNegatibleForFree only sees the operand, and
doesn't see it's used from a nsz context. A nsz parameter needs to be
added and threaded through isNegatibleForFree to avoid this.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/AArch64/arm64-fp.ll
    llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 02ae11d8a002..1415b1e37d15 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13350,6 +13350,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
     return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
 
+  // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is
+  // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it
+  // was called from a context with a nsz flag if the input fsub does not.
+  if (N0.getOpcode() == ISD::FSUB &&
+      (DAG.getTarget().Options.NoSignedZerosFPMath ||
+       N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
+    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
+                       N0.getOperand(0), N->getFlags());
+  }
+
   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
   // constant pool values.
   if (!TLI.isFNegFree(VT) &&

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a808b399596f..6dcf271a81c7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4688,11 +4688,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
 
-    // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
-    if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
-        OpOpcode == ISD::FSUB)
-      return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
-                     Operand.getOperand(0), Flags);
     if (OpOpcode == ISD::FNEG)  // --X -> X
       return Operand.getOperand(0);
     break;

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fp.ll b/llvm/test/CodeGen/AArch64/arm64-fp.ll
index cf8b90de130b..85c5f25fa4bc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp.ll
@@ -38,9 +38,8 @@ define double @negation_propagation(double* %arg, double %arg1, double %arg2) {
 define { double, double } @testfn(double %x, double %y) #0 {
 ; CHECK-LABEL: testfn:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fsub d2, d0, d1
-; CHECK-NEXT:    fsub d1, d1, d0
-; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    fsub d0, d0, d1
+; CHECK-NEXT:    fneg d1, d0
 ; CHECK-NEXT:    ret
   %sub = fsub fast double %x, %y
   %neg = fneg fast double %sub

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll b/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll
index 3e3284ed0260..76300eb13167 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-no-signed-zeros-fp-math=true < %s | FileCheck %s
-
-; FIXME: This should be the same when the extra instructions are fixed
-; XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-no-signed-zeros-fp-math=false < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-no-signed-zeros-fp-math=false < %s | FileCheck %s
 
 ; no-signed-zeros-fp-math should not increase the number of
 ; instructions emitted.
@@ -11,10 +9,9 @@ define { double, double } @testfn(double %arg, double %arg1, double %arg2) {
 ; CHECK-LABEL: testfn:
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_add_f64 v[6:7], v[4:5], -v[0:1]
-; CHECK-NEXT:    v_add_f64 v[4:5], v[0:1], -v[4:5]
-; CHECK-NEXT:    v_add_f64 v[0:1], v[6:7], -v[2:3]
-; CHECK-NEXT:    v_add_f64 v[2:3], -v[2:3], v[4:5]
+; CHECK-NEXT:    v_add_f64 v[4:5], v[4:5], -v[0:1]
+; CHECK-NEXT:    v_add_f64 v[0:1], v[4:5], -v[2:3]
+; CHECK-NEXT:    v_add_f64 v[2:3], -v[2:3], -v[4:5]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %tmp = fsub fast double 0.000000e+00, %arg1