[PATCH] D9893: fix an invisible bug when combining repeated FP divisors
Sanjay Patel
spatel at rotateright.com
Thu Jul 9 10:28:56 PDT 2015
This revision was automatically updated to reflect the committed changes.
Closed by commit rL241826: fix an invisible bug when combining repeated FP divisors (authored by spatel).
Changed prior to commit:
http://reviews.llvm.org/D9893?vs=26173&id=29353#toc
Repository:
rL LLVM
http://reviews.llvm.org/D9893
Files:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/fdiv-combine.ll
Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8374,18 +8374,25 @@
if (TLI.combineRepeatedFPDivisors(Users.size())) {
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+ // FIXME: This optimization requires some level of fast-math, so the
+ // created reciprocal node should at least have the 'allowReciprocal'
+ // fast-math-flag set.
SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
// Dividend / Divisor -> Dividend * Reciprocal
for (auto *U : Users) {
SDValue Dividend = U->getOperand(0);
if (Dividend != FPOne) {
SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
Reciprocal);
- DAG.ReplaceAllUsesWith(U, NewNode.getNode());
+ CombineTo(U, NewNode);
+ } else if (U != Reciprocal.getNode()) {
+ // In the absence of fast-math-flags, this user node is always the
+ // same node as Reciprocal, but with FMF they may be different nodes.
+ CombineTo(U, Reciprocal);
}
}
- return SDValue();
+ return SDValue(N, 0); // N was replaced.
}
}
Index: llvm/trunk/test/CodeGen/X86/fdiv-combine.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/fdiv-combine.ll
+++ llvm/trunk/test/CodeGen/X86/fdiv-combine.ll
@@ -27,5 +27,22 @@
ret float %div2
}
+; If the reciprocal is already calculated, we should not
+; generate an extra multiplication by 1.0.
+
+define double @div3_arcp(double %x, double %y, double %z) #0 {
+; CHECK-LABEL: div3_arcp:
+; CHECK: # BB#0:
+; CHECK-NEXT: movsd{{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: divsd %xmm1, %xmm2
+; CHECK-NEXT: mulsd %xmm2, %xmm0
+; CHECK-NEXT: addsd %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %div1 = fdiv fast double 1.0, %y
+ %div2 = fdiv fast double %x, %y
+ %ret = fadd fast double %div2, %div1
+ ret double %ret
+}
+
; FIXME: If the backend understands 'arcp', then this attribute is unnecessary.
attributes #0 = { "unsafe-fp-math"="true" }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D9893.29353.patch
Type: text/x-patch
Size: 2295 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150709/d4a46269/attachment.bin>
More information about the llvm-commits
mailing list