[PATCH] D9893: fix an invisible bug when combining repeated FP divisors

Sanjay Patel spatel at rotateright.com
Thu Jul 9 10:28:56 PDT 2015


This revision was automatically updated to reflect the committed changes.
Closed by commit rL241826: fix an invisible bug when combining repeated FP divisors (authored by spatel).

Changed prior to commit:
  http://reviews.llvm.org/D9893?vs=26173&id=29353#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D9893

Files:
  llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/trunk/test/CodeGen/X86/fdiv-combine.ll

Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8374,18 +8374,25 @@
 
     if (TLI.combineRepeatedFPDivisors(Users.size())) {
       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+      // FIXME: This optimization requires some level of fast-math, so the
+      // created reciprocal node should at least have the 'allowReciprocal'
+      // fast-math-flag set.
       SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
 
       // Dividend / Divisor -> Dividend * Reciprocal
       for (auto *U : Users) {
         SDValue Dividend = U->getOperand(0);
         if (Dividend != FPOne) {
           SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
                                         Reciprocal);
-          DAG.ReplaceAllUsesWith(U, NewNode.getNode());
+          CombineTo(U, NewNode);
+        } else if (U != Reciprocal.getNode()) {
+          // In the absence of fast-math-flags, this user node is always the
+          // same node as Reciprocal, but with FMF they may be different nodes.
+          CombineTo(U, Reciprocal);
         }
       }
-      return SDValue();
+      return SDValue(N, 0);  // N was replaced.
     }
   }
 
Index: llvm/trunk/test/CodeGen/X86/fdiv-combine.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/fdiv-combine.ll
+++ llvm/trunk/test/CodeGen/X86/fdiv-combine.ll
@@ -27,5 +27,22 @@
   ret float %div2
 }
 
+; If the reciprocal is already calculated, we should not
+; generate an extra multiplication by 1.0. 
+
+define double @div3_arcp(double %x, double %y, double %z) #0 {
+; CHECK-LABEL: div3_arcp:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsd{{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT:    divsd %xmm1, %xmm2
+; CHECK-NEXT:    mulsd %xmm2, %xmm0
+; CHECK-NEXT:    addsd %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %div1 = fdiv fast double 1.0, %y
+  %div2 = fdiv fast double %x, %y
+  %ret = fadd fast double %div2, %div1
+  ret double %ret
+}
+
 ; FIXME: If the backend understands 'arcp', then this attribute is unnecessary.
 attributes #0 = { "unsafe-fp-math"="true" }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D9893.29353.patch
Type: text/x-patch
Size: 2295 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150709/d4a46269/attachment.bin>


More information about the llvm-commits mailing list