[llvm] r222710 - [PowerPC] Implement combineRepeatedFPDivisors
Hal Finkel
hfinkel at anl.gov
Mon Nov 24 15:45:22 PST 2014
Author: hfinkel
Date: Mon Nov 24 17:45:21 2014
New Revision: 222710
URL: http://llvm.org/viewvc/llvm-project?rev=222710&view=rev
Log:
[PowerPC] Implement combineRepeatedFPDivisors
This does not matter on newer cores (where we can use reciprocal estimates in
fast-math mode anyway), but for older cores this allows us to generate better
fast-math code where we have multiple FDIVs with a common divisor.
Added:
llvm/trunk/test/CodeGen/PowerPC/fdiv-combine.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=222710&r1=222709&r2=222710&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Mon Nov 24 17:45:21 2014
@@ -7526,6 +7526,28 @@ SDValue PPCTargetLowering::getRecipEstim
return SDValue();
}
+bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+ // Note: This functionality is used only when unsafe-fp-math is enabled, and
+ // on cores with reciprocal estimates (which are used when unsafe-fp-math is
+ // enabled for division), this functionality is redundant with the default
+ // combiner logic (once the division -> reciprocal/multiply transformation
+ // has taken place). As a result, this matters more for older cores than for
+ // newer ones.
+
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+ // reciprocal if there are two or more FDIVs (for embedded cores with only
+ // one FP pipeline) for three or more FDIVs (for generic OOO cores).
+ switch (Subtarget.getDarwinDirective()) {
+ default:
+ return NumUsers > 2;
+ case PPC::DIR_440:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ return NumUsers > 1;
+ }
+}
+
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
unsigned Bytes, int Dist,
SelectionDAG &DAG) {
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=222710&r1=222709&r2=222710&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Mon Nov 24 17:45:21 2014
@@ -704,6 +704,7 @@ namespace llvm {
bool &UseOneConstNR) const override;
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const override;
+ bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
};
Added: llvm/trunk/test/CodeGen/PowerPC/fdiv-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fdiv-combine.ll?rev=222710&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fdiv-combine.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/fdiv-combine.ll Mon Nov 24 17:45:21 2014
@@ -0,0 +1,39 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Following test case checks:
+; a / D; b / D; c / D;
+; =>
+; recip = 1.0 / D; a * recip; b * recip; c * recip;
+
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+; CHECK-LABEL: three_fdiv_double:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+ %div = fdiv double %a, %D
+ %div1 = fdiv double %b, %D
+ %div2 = fdiv double %c, %D
+ tail call void @foo_3d(double %div, double %div1, double %div2)
+ ret void
+}
+
+define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+; CHECK-LABEL: two_fdiv_double:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+ %div = fdiv double %a, %D
+ %div1 = fdiv double %b, %D
+ tail call void @foo_2d(double %div, double %div1)
+ ret void
+}
+
+declare void @foo_3d(double, double, double)
+declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
+declare void @foo_2d(double, double)
+
+attributes #0 = { "unsafe-fp-math"="true" }
More information about the llvm-commits
mailing list