[llvm] r334608 - [DAGCombiner] remove hasOneUse() check from fadd constants transform

Wed Jun 13 08:22:48 PDT 2018

Author: spatel
Date: Wed Jun 13 08:22:48 2018
New Revision: 334608

URL: http://llvm.org/viewvc/llvm-project?rev=334608&view=rev
Log:
[DAGCombiner] remove hasOneUse() check from fadd constants transform

We're constant folding here, so we shouldn't check uses. This matches
the IR optimizer behavior.

The x86 test shows the expected win. The AArch64 test shows something
else. This only seems to happen if the "generic" AArch64 CPU model is 
used by MachineCombiner, so I'll file a bug report to follow-up.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll
    llvm/trunk/test/CodeGen/X86/fadd-combines.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=334608&r1=334607&r2=334608&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Jun 13 08:22:48 2018
@@ -10351,13 +10351,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N
     // Selection pass has a hard time dealing with FP constants.
     bool AllowNewConst = (Level < AfterLegalizeDAG);
 
-    // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
-    if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
-        isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
-      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
-                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
-                                     Flags),
-                         Flags);
+    // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
+    if (N1CFP && N0.getOpcode() == ISD::FADD &&
+        isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+      SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
+      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
+    }
 
     // If allowed, fold (fadd (fneg x), x) -> 0.0
     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)

Modified: llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll?rev=334608&r1=334607&r2=334608&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll Wed Jun 13 08:22:48 2018
@@ -112,14 +112,19 @@ define float @fadd_const_multiuse_fmf(fl
   ret float %a3
 }
 
+; DAGCombiner transforms this into: (x + 59.0) + (x + 17.0).
+; The machine combiner transforms this into a chain of 3 dependent adds:
+; ((x + 59.0) + 17.0) + x
+
 define float @fadd_const_multiuse_attr(float %x) #0 {
 ; CHECK-LABEL: fadd_const_multiuse_attr:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x9, .LCPI8_1
 ; CHECK-NEXT:    adrp x8, .LCPI8_0
-; CHECK-NEXT:    ldr s1, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT:    fadd s0, s0, s1
-; CHECK-NEXT:    fmov s1, #17.00000000
+; CHECK-NEXT:    ldr s1, [x9, :lo12:.LCPI8_1]
+; CHECK-NEXT:    ldr s2, [x8, :lo12:.LCPI8_0]
 ; CHECK-NEXT:    fadd s1, s0, s1
+; CHECK-NEXT:    fadd s1, s2, s1
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
   %a1 = fadd float %x, 42.0

Modified: llvm/trunk/test/CodeGen/X86/fadd-combines.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fadd-combines.ll?rev=334608&r1=334607&r2=334608&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fadd-combines.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fadd-combines.ll Wed Jun 13 08:22:48 2018
@@ -221,17 +221,16 @@ define <4 x float> @fadd_fadd_x_x_fadd_x
   ret <4 x float> %z
 }
 
-; FIXME:
 ; ((x + 42.0) + 17.0) + (x + 42.0) --> (x + 59.0) + (x + 17.0)
-; It's still 3 adds, but the first two are independent.
+; It's still 3 adds, but the first 2 are independent.
 ; More reassocation could get this to 2 adds or 1 FMA (that's done in IR, but not in the DAG).
 
 define float @fadd_const_multiuse_attr(float %x) #0 {
 ; CHECK-LABEL: fadd_const_multiuse_attr:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addss {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    addss %xmm0, %xmm1
+; CHECK-NEXT:    addss {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    addss %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %a1 = fadd float %x, 42.0