[llvm-commits] [llvm] r158956 - in /llvm/trunk: include/llvm/Target/TargetOptions.h lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/Target/ARM/ARMInstrInfo.td test/CodeGen/ARM/fusedMAC.ll test/CodeGen/PowerPC/a2-fp-basic.ll test/CodeGen/PowerPC/fma.ll test/CodeGen/PowerPC/ppc440-fp-basic.ll tools/llc/llc.cpp
Hal Finkel
hfinkel at anl.gov
Thu Jun 21 18:47:10 PDT 2012
On Fri, 22 Jun 2012 01:09:10 -0000
Lang Hames <lhames at gmail.com> wrote:
> Author: lhames
> Date: Thu Jun 21 20:09:09 2012
> New Revision: 158956
>
> URL: http://llvm.org/viewvc/llvm-project?rev=158956&view=rev
> Log:
> Rename -allow-excess-fp-precision flag to -fuse-fp-ops, and switch
> from a boolean flag to an enum: { Fast, Standard, Strict } (default =
> Standard).
>
> This option controls the creation by optimizations of fused FP ops
> that store intermediate results in higher precision than IEEE allows
> (E.g. FMAs). The behavior of this option is intended to match the
> behaviour specified by a soon-to-be-introduced frontend flag:
> '-ffuse-fp-ops'.
>
> Fast mode - allows formation of fused FP ops whenever they're
> profitable.
>
> Standard mode - allow fusion only for 'blessed' FP ops. At present
> the only blessed op is the fmuladd intrinsic. In the future more
> blessed ops may be added.
>
> Strict mode - allow fusion only if/when it can be proven that the
> excess precision won't effect the result.
Why are we exposing the standard vs. strict choice at the LLVM level? I
thought this makes sense only at the C-language level (as it relates to
C statement boundaries). As far as LLVM is concerned, either it can
form FMAs freely or it can't. If there is not a good reason to push
these choices directly to the LLVM level, we may just want to keep this
a boolean option and leave the standard vs. strict choice to clang.
>
> Note: This option only controls formation of fused ops by the
> optimizers. Fused operations that are explicitly requested (e.g. FMA
> via the llvm.fma.* intrinsic) will always be honored, regardless of
> the value of this option.
This seems to contradict the code below where you only select the
ISD::FMA node if the mode is not strict.
-Hal
>
> Internally TargetOptions::AllowExcessFPPrecision has been replaced by
> TargetOptions::AllowFPOpFusion.
>
>
> Modified:
> llvm/trunk/include/llvm/Target/TargetOptions.h
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
> llvm/trunk/test/CodeGen/ARM/fusedMAC.ll
> llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll
> llvm/trunk/test/CodeGen/PowerPC/fma.ll
> llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll
> llvm/trunk/tools/llc/llc.cpp
>
> Modified: llvm/trunk/include/llvm/Target/TargetOptions.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetOptions.h?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetOptions.h (original) +++
> llvm/trunk/include/llvm/Target/TargetOptions.h Thu Jun 21 20:09:09
> 2012 @@ -30,12 +30,20 @@ };
> }
>
> + namespace FPOpFusion {
> + enum FPOpFusionMode {
> + Fast, // Enable fusion of FP ops wherever it's profitable.
> + Standard, // Only allow fusion of 'blessed' ops (currently
> just fmuladd).
> + Strict // Never fuse FP-ops.
> + };
> + }
> +
> class TargetOptions {
> public:
> TargetOptions()
> : PrintMachineCode(false), NoFramePointerElim(false),
> NoFramePointerElimNonLeaf(false),
> LessPreciseFPMADOption(false),
> - AllowExcessFPPrecision(false), UnsafeFPMath(false),
> NoInfsFPMath(false),
> + UnsafeFPMath(false), NoInfsFPMath(false),
> NoNaNsFPMath(false),
> HonorSignDependentRoundingFPMathOption(false), UseSoftFloat(false),
> NoZerosInBSS(false), JITExceptionHandling(false),
> JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false), @@ -43,7
> +51,8 @@ StackAlignmentOverride(0), RealignStack(true),
> DisableJumpTables(false), EnableFastISel(false),
> PositionIndependentExecutable(false),
> EnableSegmentedStacks(false),
> - UseInitArray(false), TrapFuncName(""),
> FloatABIType(FloatABI::Default)
> + UseInitArray(false), TrapFuncName(""),
> FloatABIType(FloatABI::Default),
> + AllowFPOpFusion(FPOpFusion::Standard)
> {}
>
> /// PrintMachineCode - This flag is enabled when the
> -print-machineinstrs @@ -74,14 +83,6 @@
> unsigned LessPreciseFPMADOption : 1;
> bool LessPreciseFPMAD() const;
>
> - /// AllowExcessFPPrecision - This flag is enabled when the
> - /// -enable-excess-fp-precision flag is specified on the command
> line. This
> - /// flag is OFF by default. When it is turned on, the code
> generator is
> - /// allowed to produce results that are "more precise" than IEEE
> allows.
> - /// This includes use of FMA-like operations and use of the X86
> FP registers
> - /// without rounding all over the place.
> - unsigned AllowExcessFPPrecision : 1;
> -
> /// UnsafeFPMath - This flag is enabled when the
> /// -enable-unsafe-fp-math flag is specified on the command
> line. When /// this flag is off (the default), the code generator is
> not allowed to @@ -189,6 +190,25 @@
> /// Such a combination is unfortunately popular (e.g.
> arm-apple-darwin). /// Hard presumes that the normal FP ABI is used.
> FloatABI::ABIType FloatABIType;
> +
> + /// AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx
> option.
> + /// This controls the creation of fused FP ops that store
> intermediate
> + /// results in higher precision than IEEE allows (E.g. FMAs).
> + ///
> + /// Fast mode - allows formation of fused FP ops whenever they're
> + /// profitable.
> + /// Standard mode - allow fusion only for 'blessed' FP ops. At
> present the
> + /// only blessed op is the fmuladd intrinsic. In the future more
> blessed ops
> + /// may be added.
> + /// Strict mode - allow fusion only if/when it can be proven
> that the excess
> + /// precision won't effect the result.
> + ///
> + /// Note: This option only controls formation of fused ops by
> the optimizers.
> + /// Fused operations that are explicitly specified (e.g. FMA via
> the
> + /// llvm.fma.* intrinsic) will always be honored, regardless of
> the value of
> + /// this option.
> + FPOpFusion::FPOpFusionMode AllowFPOpFusion;
> +
> };
> } // End llvm namespace
>
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Jun 21
> 20:09:09 2012 @@ -5644,7 +5644,7 @@ N0.getOperand(1), N1));
>
> // FADD -> FMA combines:
> - if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
> + if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
> DAG.getTarget().Options.UnsafeFPMath) &&
> DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT)
> && TLI.isOperationLegal(ISD::FMA, VT)) {
> @@ -5721,7 +5721,7 @@
> }
>
> // FSUB -> FMA combines:
> - if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
> + if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
> DAG.getTarget().Options.UnsafeFPMath) &&
> DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT)
> && TLI.isOperationLegal(ISD::FMA, VT)) {
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> (original) +++
> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Thu Jun
> 21 20:09:09 2012 @@ -4934,7 +4934,9 @@ return 0; case
> Intrinsic::fmuladd: { EVT VT = TLI.getValueType(I.getType());
> - if (TLI.isOperationLegal(ISD::FMA, VT) &&
> TLI.isFMAFasterThanMulAndAdd(VT)){
> + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
> + TLI.isOperationLegal(ISD::FMA, VT) &&
> + TLI.isFMAFasterThanMulAndAdd(VT)){
> setValue(&I, DAG.getNode(ISD::FMA, dl,
> getValue(I.getArgOperand(0)).getValueType(),
> getValue(I.getArgOperand(0)),
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++
> llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Thu Jun 21 20:09:09 2012 @@
> -236,7 +236,8 @@ // Prefer fused MAC for fp mul + add over fp VMLA /
> VMLS if they are available. // But only select them if more precision
> in FP computation is allowed. // Do not use them for Darwin platforms.
> -def UseFusedMAC : Predicate<"TM.Options.AllowExcessFPPrecision
> && " +def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion
> =="
> + " FPOpFusion::Fast) && "
> "!Subtarget->isTargetDarwin()">;
> def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
> "Subtarget->isTargetDarwin()">;
>
> Modified: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fusedMAC.ll?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/fusedMAC.ll (original) +++
> llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Thu Jun 21 20:09:09 2012 @@
> -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4
> -enable-excess-fp-precision | FileCheck %s +; RUN: llc < %s
> -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fuse-fp-ops=fast | FileCheck
> %s ; Check generated fused MAC and MLS.
> define double @fusedMACTest1(double %d1, double %d2, double %d3) {
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll (original) +++
> llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll Thu Jun 21 20:09:09
> 2012 @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc64 -mcpu=a2
> -enable-excess-fp-precision | FileCheck %s +; RUN: llc < %s
> -march=ppc64 -mcpu=a2 -fuse-fp-ops=fast | FileCheck %s
> %0 = type { double, double }
>
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/fma.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma.ll?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/fma.ll (original) +++
> llvm/trunk/test/CodeGen/PowerPC/fma.ll Thu Jun 21 20:09:09 2012 @@
> -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32
> -enable-excess-fp-precision | \ +; RUN: llc < %s -march=ppc32
> -fuse-fp-ops=fast | \ ; RUN: egrep {fn?madd|fn?msub} | count 8
>
> define double @test_FMADD1(double %A, double %B, double %C) {
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll (original) +++
> llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll Thu Jun 21
> 20:09:09 2012 @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 -mcpu=440
> -enable-excess-fp-precision | FileCheck %s +; RUN: llc < %s
> -march=ppc32 -mcpu=440 -fuse-fp-ops=fast | FileCheck %s
> %0 = type { double, double }
>
>
> Modified: llvm/trunk/tools/llc/llc.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llc/llc.cpp?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/tools/llc/llc.cpp (original) +++
> llvm/trunk/tools/llc/llc.cpp Thu Jun 21 20:09:09 2012 @@ -156,11
> +156,6 @@ cl::init(false));
>
> static cl::opt<bool>
> -EnableExcessPrecision("enable-excess-fp-precision",
> - cl::desc("Enable optimizations that may increase FP precision"),
> - cl::init(false));
> -
> -static cl::opt<bool>
> EnableUnsafeFPMath("enable-unsafe-fp-math",
> cl::desc("Enable optimizations that may decrease FP precision"),
> cl::init(false));
> @@ -199,6 +194,19 @@
> "Hard float ABI (uses FP registers)"),
> clEnumValEnd));
>
> +static cl::opt<llvm::FPOpFusion::FPOpFusionMode>
> +FuseFPOps("fuse-fp-ops",
> + cl::desc("Enable aggresive formation of fused FP ops"),
> + cl::init(FPOpFusion::Standard),
> + cl::values(
> + clEnumValN(FPOpFusion::Fast, "fast",
> + "Fuse FP ops whenever profitable"),
> + clEnumValN(FPOpFusion::Standard, "standard",
> + "Only fuse 'blessed' FP ops."),
> + clEnumValN(FPOpFusion::Strict, "strict",
> + "Only fuse FP ops when the result won't be
> effected."),
> + clEnumValEnd));
> +
> static cl::opt<bool>
> DontPlaceZerosInBSS("nozero-initialized-in-bss",
> cl::desc("Don't place zero-initialized symbols into bss section"),
> @@ -404,7 +412,7 @@
> Options.LessPreciseFPMADOption = EnableFPMAD;
> Options.NoFramePointerElim = DisableFPElim;
> Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
> - Options.AllowExcessFPPrecision = EnableExcessPrecision;
> + Options.AllowFPOpFusion = FuseFPOps;
> Options.UnsafeFPMath = EnableUnsafeFPMath;
> Options.NoInfsFPMath = EnableNoInfsFPMath;
> Options.NoNaNsFPMath = EnableNoNaNsFPMath;
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
--
Hal Finkel
Postdoctoral Appointee
Leadership Computing Facility
Argonne National Laboratory
More information about the llvm-commits
mailing list