[PATCH] transform fadd chains to increase parallelism

Sanjay Patel spatel at rotateright.com
Tue Apr 28 13:34:34 PDT 2015


Patch updated:

1. Fixed fold comment to match code
2. Moved variable declaration closer to use
3. Made test CHECK lines match expected output exactly


http://reviews.llvm.org/D9232

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/X86/fp-fast.ll

Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7801,6 +7801,24 @@
                            N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
       }
     }
+
+    // Canonicalize chains of adds to LHS to simplify the following transform.
+    if (N0.getOpcode() != ISD::FADD && N1.getOpcode() == ISD::FADD)
+      return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
+    
+    // Convert a chain of 3 dependent operations into 2 independent operations
+    // and 1 dependent operation:
+    //  (fadd N0: (fadd N00: (fadd z, w), N01: y), N1: x) ->
+    //  (fadd N00: (fadd z, w), (fadd N1: x, N01: y))
+    if (N0.getOpcode() == ISD::FADD &&  N0.hasOneUse() &&
+        N1.getOpcode() != ISD::FADD) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FADD) {
+        SDValue N01 = N0.getOperand(1);
+        SDValue NewAdd = DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N01);
+        return DAG.getNode(ISD::FADD, SDLoc(N), VT, N00, NewAdd);
+      }
+    }
   } // enable-unsafe-fp-math
 
   // FADD -> FMA combines:
Index: test/CodeGen/X86/fp-fast.ll
===================================================================
--- test/CodeGen/X86/fp-fast.ll
+++ test/CodeGen/X86/fp-fast.ll
@@ -113,3 +113,46 @@
   %t2 = fadd float %a, %t1
   ret float %t2
 }
+
+; Verify that the first two adds are independent; the destination registers
+; are used as source registers for the third add.
+
+define float @reassociate_adds1(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: reassociate_adds1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm2, %xmm3, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %add0 = fadd float %a, %b
+  %add1 = fadd float %add0, %c
+  %add2 = fadd float %add1, %d
+  ret float %add2
+}
+
+define float @reassociate_adds2(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: reassociate_adds2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm2, %xmm3, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %add0 = fadd float %a, %b
+  %add1 = fadd float %c, %add0
+  %add2 = fadd float %add1, %d
+  ret float %add2
+}
+
+define float @reassociate_adds3(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: reassociate_adds3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm2, %xmm3, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %add0 = fadd float %a, %b
+  %add1 = fadd float %add0, %c
+  %add2 = fadd float %d, %add1
+  ret float %add2
+}
+

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D9232.24574.patch
Type: text/x-patch
Size: 2806 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150428/034fb21a/attachment.bin>


More information about the llvm-commits mailing list