[llvm-commits] [llvm] r158757 - in /llvm/trunk: include/llvm/Target/TargetOptions.h lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/PowerPC/PPCInstrInfo.td test/CodeGen/ARM/fusedMAC.ll test/CodeGen/PowerPC/a2-fp-basic.ll test/CodeGen/PowerPC/fma.ll test/CodeGen/PowerPC/ppc440-fp-basic.ll tools/llc/llc.cpp
Lang Hames
lhames at gmail.com
Tue Jun 19 15:51:24 PDT 2012
Author: lhames
Date: Tue Jun 19 17:51:23 2012
New Revision: 158757
URL: http://llvm.org/viewvc/llvm-project?rev=158757&view=rev
Log:
Add DAG-combines for aggressive FMA formation.
This patch adds DAG combines to form FMAs from pairs of FADD + FMUL or
FSUB + FMUL. The combines are performed when:
(a) Either
AllowExcessFPPrecision option (-enable-excess-fp-precision for llc)
OR
UnsafeFPMath option (-enable-unsafe-fp-math)
are set, and
(b) TargetLoweringInfo::isFMAFasterThanMulAndAdd(VT) is true for the type of
the FADD/FSUB, and
(c) The FMUL only has one user (the FADD/FSUB).
If your target has fast FMA instructions you can make use of these combines by
overriding TargetLoweringInfo::isFMAFasterThanMulAndAdd(VT) to return true for
types supported by your FMA instruction, and adding patterns to match ISD::FMA
to your FMA instructions.
Modified:
llvm/trunk/include/llvm/Target/TargetOptions.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
llvm/trunk/test/CodeGen/ARM/fusedMAC.ll
llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll
llvm/trunk/test/CodeGen/PowerPC/fma.ll
llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll
llvm/trunk/tools/llc/llc.cpp
Modified: llvm/trunk/include/llvm/Target/TargetOptions.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetOptions.h?rev=158757&r1=158756&r2=158757&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetOptions.h (original)
+++ llvm/trunk/include/llvm/Target/TargetOptions.h Tue Jun 19 17:51:23 2012
@@ -35,7 +35,7 @@
TargetOptions()
: PrintMachineCode(false), NoFramePointerElim(false),
NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false),
- NoExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),
+ AllowExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),
NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false),
UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false),
JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false),
@@ -74,13 +74,13 @@
unsigned LessPreciseFPMADOption : 1;
bool LessPreciseFPMAD() const;
- /// NoExcessFPPrecision - This flag is enabled when the
- /// -disable-excess-fp-precision flag is specified on the command line.
- /// When this flag is off (the default), the code generator is allowed to
- /// produce results that are "more precise" than IEEE allows. This includes
- /// use of FMA-like operations and use of the X86 FP registers without
- /// rounding all over the place.
- unsigned NoExcessFPPrecision : 1;
+ /// AllowExcessFPPrecision - This flag is enabled when the
+ /// -enable-excess-fp-precision flag is specified on the command line. This
+ /// flag is OFF by default. When it is turned on, the code generator is
+ /// allowed to produce results that are "more precise" than IEEE allows.
+ /// This includes use of FMA-like operations and use of the X86 FP registers
+ /// without rounding all over the place.
+ unsigned AllowExcessFPPrecision : 1;
/// UnsafeFPMath - This flag is enabled when the
/// -enable-unsafe-fp-math flag is specified on the command line. When
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=158757&r1=158756&r2=158757&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jun 19 17:51:23 2012
@@ -5633,6 +5633,26 @@
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
+ // FADD -> FMA combines:
+ if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegal(ISD::FMA, VT)) {
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+ }
+
return SDValue();
}
@@ -5690,6 +5710,29 @@
}
}
+ // FSUB -> FMA combines:
+ if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegal(ISD::FMA, VT)) {
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+ }
+ }
+
return SDValue();
}
Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=158757&r1=158756&r2=158757&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Tue Jun 19 17:51:23 2012
@@ -236,7 +236,7 @@
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
// But only select them if more precision in FP computation is allowed.
// Do not use them for Darwin platforms.
-def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && "
+def UseFusedMAC : Predicate<"TM.Options.AllowExcessFPPrecision && "
"!Subtarget->isTargetDarwin()">;
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=158757&r1=158756&r2=158757&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Tue Jun 19 17:51:23 2012
@@ -353,7 +353,7 @@
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
+def FPContractions : Predicate<"TM.Options.AllowExcessFPPrecision">;
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
Modified: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fusedMAC.ll?rev=158757&r1=158756&r2=158757&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fusedMAC.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Tue Jun 19 17:51:23 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -enable-excess-fp-precision | FileCheck %s
; Check generated fused MAC and MLS.
define double @fusedMACTest1(double %d1, double %d2, double %d3) {
Modified: llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll?rev=158757&r1=158756&r2=158757&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll Tue Jun 19 17:51:23 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 -enable-excess-fp-precision | FileCheck %s
%0 = type { double, double }
Modified: llvm/trunk/test/CodeGen/PowerPC/fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma.ll?rev=158757&r1=158756&r2=158757&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fma.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/fma.ll Tue Jun 19 17:51:23 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 -enable-excess-fp-precision | \
; RUN: egrep {fn?madd|fn?msub} | count 8
define double @test_FMADD1(double %A, double %B, double %C) {
Modified: llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll?rev=158757&r1=158756&r2=158757&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll Tue Jun 19 17:51:23 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=440 -enable-excess-fp-precision | FileCheck %s
%0 = type { double, double }
Modified: llvm/trunk/tools/llc/llc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llc/llc.cpp?rev=158757&r1=158756&r2=158757&view=diff
==============================================================================
--- llvm/trunk/tools/llc/llc.cpp (original)
+++ llvm/trunk/tools/llc/llc.cpp Tue Jun 19 17:51:23 2012
@@ -156,8 +156,8 @@
cl::init(false));
static cl::opt<bool>
-DisableExcessPrecision("disable-excess-fp-precision",
- cl::desc("Disable optimizations that may increase FP precision"),
+EnableExcessPrecision("enable-excess-fp-precision",
+ cl::desc("Enable optimizations that may increase FP precision"),
cl::init(false));
static cl::opt<bool>
@@ -404,7 +404,7 @@
Options.LessPreciseFPMADOption = EnableFPMAD;
Options.NoFramePointerElim = DisableFPElim;
Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
- Options.NoExcessFPPrecision = DisableExcessPrecision;
+ Options.AllowExcessFPPrecision = EnableExcessPrecision;
Options.UnsafeFPMath = EnableUnsafeFPMath;
Options.NoInfsFPMath = EnableNoInfsFPMath;
Options.NoNaNsFPMath = EnableNoNaNsFPMath;
More information about the llvm-commits
mailing list