[llvm-commits] [llvm] r158956 - in /llvm/trunk: include/llvm/Target/TargetOptions.h lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/Target/ARM/ARMInstrInfo.td test/CodeGen/ARM/fusedMAC.ll test/CodeGen/PowerPC/a2-fp-basic.ll test/CodeGen/PowerPC/fma.ll test/CodeGen/PowerPC/ppc440-fp-basic.ll tools/llc/llc.cpp

Thu Jun 21 18:47:10 PDT 2012

On Fri, 22 Jun 2012 01:09:10 -0000
Lang Hames <lhames at gmail.com> wrote:

> Author: lhames
> Date: Thu Jun 21 20:09:09 2012
> New Revision: 158956
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=158956&view=rev
> Log:
> Rename -allow-excess-fp-precision flag to -fuse-fp-ops, and switch
> from a boolean flag to an enum: { Fast, Standard, Strict } (default =
> Standard).
> 
> This option controls the creation by optimizations of fused FP ops
> that store intermediate results in higher precision than IEEE allows
> (E.g. FMAs). The behavior of this option is intended to match the
> behaviour specified by a soon-to-be-introduced frontend flag:
> '-ffuse-fp-ops'.
> 
> Fast mode - allows formation of fused FP ops whenever they're
> profitable.
> 
> Standard mode - allow fusion only for 'blessed' FP ops. At present
> the only blessed op is the fmuladd intrinsic. In the future more
> blessed ops may be added.
> 
> Strict mode - allow fusion only if/when it can be proven that the
> excess precision won't effect the result.

Why are we exposing the standard vs. strict choice at the LLVM level? I
thought this makes sense only at the C-language level (as it relates to
C statement boundaries). As far as LLVM is concerned, either it can
form FMAs freely or it can't. If there is not a good reason to push
these choices directly to the LLVM level, we may just want to keep this
a boolean option and leave the standard vs. strict choice to clang.

> 
> Note: This option only controls formation of fused ops by the
> optimizers.  Fused operations that are explicitly requested (e.g. FMA
> via the llvm.fma.* intrinsic) will always be honored, regardless of
> the value of this option.

This seems to contradict the code below where you only select the
ISD::FMA node if the mode is not strict.

 -Hal

> 
> Internally TargetOptions::AllowExcessFPPrecision has been replaced by
> TargetOptions::AllowFPOpFusion.
> 
> 
> Modified:
>     llvm/trunk/include/llvm/Target/TargetOptions.h
>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>     llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
>     llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
>     llvm/trunk/test/CodeGen/ARM/fusedMAC.ll
>     llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll
>     llvm/trunk/test/CodeGen/PowerPC/fma.ll
>     llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll
>     llvm/trunk/tools/llc/llc.cpp
> 
> Modified: llvm/trunk/include/llvm/Target/TargetOptions.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetOptions.h?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetOptions.h (original) +++
> llvm/trunk/include/llvm/Target/TargetOptions.h Thu Jun 21 20:09:09
> 2012 @@ -30,12 +30,20 @@ };
>    }
>  
> +  namespace FPOpFusion {
> +    enum FPOpFusionMode {
> +      Fast,     // Enable fusion of FP ops wherever it's profitable.
> +      Standard, // Only allow fusion of 'blessed' ops (currently
> just fmuladd).
> +      Strict    // Never fuse FP-ops.
> +    };
> +  }
> +
>    class TargetOptions {
>    public:
>      TargetOptions()
>          : PrintMachineCode(false), NoFramePointerElim(false),
>            NoFramePointerElimNonLeaf(false),
> LessPreciseFPMADOption(false),
> -          AllowExcessFPPrecision(false), UnsafeFPMath(false),
> NoInfsFPMath(false),
> +          UnsafeFPMath(false), NoInfsFPMath(false),
>            NoNaNsFPMath(false),
> HonorSignDependentRoundingFPMathOption(false), UseSoftFloat(false),
> NoZerosInBSS(false), JITExceptionHandling(false),
> JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false), @@ -43,7
> +51,8 @@ StackAlignmentOverride(0), RealignStack(true),
>            DisableJumpTables(false), EnableFastISel(false),
>            PositionIndependentExecutable(false),
> EnableSegmentedStacks(false),
> -          UseInitArray(false), TrapFuncName(""),
> FloatABIType(FloatABI::Default)
> +          UseInitArray(false), TrapFuncName(""),
> FloatABIType(FloatABI::Default),
> +          AllowFPOpFusion(FPOpFusion::Standard)
>      {}
>  
>      /// PrintMachineCode - This flag is enabled when the
> -print-machineinstrs @@ -74,14 +83,6 @@
>      unsigned LessPreciseFPMADOption : 1;
>      bool LessPreciseFPMAD() const;
>  
> -    /// AllowExcessFPPrecision - This flag is enabled when the
> -    /// -enable-excess-fp-precision flag is specified on the command
> line. This
> -    /// flag is OFF by default. When it is turned on, the code
> generator is
> -    /// allowed to produce results that are "more precise" than IEEE
> allows.
> -    /// This includes use of FMA-like operations and use of the X86
> FP registers
> -    /// without rounding all over the place.
> -    unsigned AllowExcessFPPrecision : 1;
> -
>      /// UnsafeFPMath - This flag is enabled when the
>      /// -enable-unsafe-fp-math flag is specified on the command
> line.  When /// this flag is off (the default), the code generator is
> not allowed to @@ -189,6 +190,25 @@
>      /// Such a combination is unfortunately popular (e.g.
> arm-apple-darwin). /// Hard presumes that the normal FP ABI is used.
>      FloatABI::ABIType FloatABIType;
> +
> +    /// AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx
> option.
> +    /// This controls the creation of fused FP ops that store
> intermediate
> +    /// results in higher precision than IEEE allows (E.g. FMAs).
> +    ///
> +    /// Fast mode - allows formation of fused FP ops whenever they're
> +    /// profitable.
> +    /// Standard mode - allow fusion only for 'blessed' FP ops. At
> present the
> +    /// only blessed op is the fmuladd intrinsic. In the future more
> blessed ops
> +    /// may be added.
> +    /// Strict mode - allow fusion only if/when it can be proven
> that the excess
> +    /// precision won't effect the result.
> +    ///
> +    /// Note: This option only controls formation of fused ops by
> the optimizers.
> +    /// Fused operations that are explicitly specified (e.g. FMA via
> the
> +    /// llvm.fma.* intrinsic) will always be honored, regardless of
> the value of
> +    /// this option.
> +    FPOpFusion::FPOpFusionMode AllowFPOpFusion;
> +
>    };
>  } // End llvm namespace
>  
> 
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Jun 21
> 20:09:09 2012 @@ -5644,7 +5644,7 @@ N0.getOperand(1), N1));
>  
>    // FADD -> FMA combines:
> -  if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
> +  if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
>         DAG.getTarget().Options.UnsafeFPMath) &&
>        DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT)
> && TLI.isOperationLegal(ISD::FMA, VT)) {
> @@ -5721,7 +5721,7 @@
>    }
>  
>    // FSUB -> FMA combines:
> -  if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
> +  if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
>         DAG.getTarget().Options.UnsafeFPMath) &&
>        DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT)
> && TLI.isOperationLegal(ISD::FMA, VT)) {
> 
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> (original) +++
> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Thu Jun
> 21 20:09:09 2012 @@ -4934,7 +4934,9 @@ return 0; case
> Intrinsic::fmuladd: { EVT VT = TLI.getValueType(I.getType());
> -    if (TLI.isOperationLegal(ISD::FMA, VT) &&
> TLI.isFMAFasterThanMulAndAdd(VT)){
> +    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
> +        TLI.isOperationLegal(ISD::FMA, VT) &&
> +        TLI.isFMAFasterThanMulAndAdd(VT)){
>        setValue(&I, DAG.getNode(ISD::FMA, dl,
>                                 getValue(I.getArgOperand(0)).getValueType(),
>                                 getValue(I.getArgOperand(0)),
> 
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++
> llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Thu Jun 21 20:09:09 2012 @@
> -236,7 +236,8 @@ // Prefer fused MAC for fp mul + add over fp VMLA /
> VMLS if they are available. // But only select them if more precision
> in FP computation is allowed. // Do not use them for Darwin platforms.
> -def UseFusedMAC      : Predicate<"TM.Options.AllowExcessFPPrecision
> && " +def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion
> =="
> +                                 " FPOpFusion::Fast) && "
>                                   "!Subtarget->isTargetDarwin()">;
>  def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "
>                                   "Subtarget->isTargetDarwin()">;
> 
> Modified: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fusedMAC.ll?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/fusedMAC.ll (original) +++
> llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Thu Jun 21 20:09:09 2012 @@
> -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4
> -enable-excess-fp-precision | FileCheck %s +; RUN: llc < %s
> -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fuse-fp-ops=fast | FileCheck
> %s ; Check generated fused MAC and MLS. 
>  define double @fusedMACTest1(double %d1, double %d2, double %d3) {
> 
> Modified: llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll (original) +++
> llvm/trunk/test/CodeGen/PowerPC/a2-fp-basic.ll Thu Jun 21 20:09:09
> 2012 @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc64 -mcpu=a2
> -enable-excess-fp-precision | FileCheck %s +; RUN: llc < %s
> -march=ppc64 -mcpu=a2 -fuse-fp-ops=fast | FileCheck %s 
>  %0 = type { double, double }
>  
> 
> Modified: llvm/trunk/test/CodeGen/PowerPC/fma.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma.ll?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/fma.ll (original) +++
> llvm/trunk/test/CodeGen/PowerPC/fma.ll Thu Jun 21 20:09:09 2012 @@
> -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32
> -enable-excess-fp-precision | \ +; RUN: llc < %s -march=ppc32
> -fuse-fp-ops=fast | \ ; RUN:   egrep {fn?madd|fn?msub} | count 8
>  
>  define double @test_FMADD1(double %A, double %B, double %C) {
> 
> Modified: llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll (original) +++
> llvm/trunk/test/CodeGen/PowerPC/ppc440-fp-basic.ll Thu Jun 21
> 20:09:09 2012 @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 -mcpu=440
> -enable-excess-fp-precision | FileCheck %s +; RUN: llc < %s
> -march=ppc32 -mcpu=440 -fuse-fp-ops=fast | FileCheck %s 
>  %0 = type { double, double }
>  
> 
> Modified: llvm/trunk/tools/llc/llc.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llc/llc.cpp?rev=158956&r1=158955&r2=158956&view=diff
> ==============================================================================
> --- llvm/trunk/tools/llc/llc.cpp (original) +++
> llvm/trunk/tools/llc/llc.cpp Thu Jun 21 20:09:09 2012 @@ -156,11
> +156,6 @@ cl::init(false));
>  
>  static cl::opt<bool>
> -EnableExcessPrecision("enable-excess-fp-precision",
> -  cl::desc("Enable optimizations that may increase FP precision"),
> -  cl::init(false));
> -
> -static cl::opt<bool>
>  EnableUnsafeFPMath("enable-unsafe-fp-math",
>    cl::desc("Enable optimizations that may decrease FP precision"),
>    cl::init(false));
> @@ -199,6 +194,19 @@
>                 "Hard float ABI (uses FP registers)"),
>      clEnumValEnd));
>  
> +static cl::opt<llvm::FPOpFusion::FPOpFusionMode>
> +FuseFPOps("fuse-fp-ops",
> +  cl::desc("Enable aggresive formation of fused FP ops"),
> +  cl::init(FPOpFusion::Standard),
> +  cl::values(
> +    clEnumValN(FPOpFusion::Fast, "fast",
> +               "Fuse FP ops whenever profitable"),
> +    clEnumValN(FPOpFusion::Standard, "standard",
> +               "Only fuse 'blessed' FP ops."),
> +    clEnumValN(FPOpFusion::Strict, "strict",
> +               "Only fuse FP ops when the result won't be
> effected."),
> +    clEnumValEnd));
> +
>  static cl::opt<bool>
>  DontPlaceZerosInBSS("nozero-initialized-in-bss",
>    cl::desc("Don't place zero-initialized symbols into bss section"),
> @@ -404,7 +412,7 @@
>    Options.LessPreciseFPMADOption = EnableFPMAD;
>    Options.NoFramePointerElim = DisableFPElim;
>    Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
> -  Options.AllowExcessFPPrecision = EnableExcessPrecision;
> +  Options.AllowFPOpFusion = FuseFPOps;
>    Options.UnsafeFPMath = EnableUnsafeFPMath;
>    Options.NoInfsFPMath = EnableNoInfsFPMath;
>    Options.NoNaNsFPMath = EnableNoNaNsFPMath;
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

-- 
Hal Finkel
Postdoctoral Appointee
Leadership Computing Facility
Argonne National Laboratory