[PATCH] R600/SI: Add subtarget feature for denormal handling

Mon Jul 14 12:20:42 PDT 2014

On Fri, Jun 20, 2014 at 05:11:14PM +0000, Matt Arsenault wrote:
> This allows toggling whether all denormals are flushed or not for double or single precision. Doesn't provide any way to use the available flush in only or flush out only modes, but I don't know what real use those are

LGTM.  Are there also some target flags, we should be switching on/off,
so that LLVM is aware of how we handle denormals.

-Tom

> 
> http://reviews.llvm.org/D4232
> 
> Files:
>   lib/Target/R600/AMDGPU.td
>   lib/Target/R600/AMDGPUAsmPrinter.cpp
>   lib/Target/R600/AMDGPUInstructions.td
>   lib/Target/R600/AMDGPUSubtarget.cpp
>   lib/Target/R600/AMDGPUSubtarget.h
>   test/CodeGen/R600/default-fp-mode.ll

> Index: lib/Target/R600/AMDGPU.td
> ===================================================================
> --- lib/Target/R600/AMDGPU.td
> +++ lib/Target/R600/AMDGPU.td
> @@ -32,11 +32,22 @@
>          "false",
>          "Disable the if conversion pass">;
>  
> -def FeatureFP64     : SubtargetFeature<"fp64",
> +def FeatureFP64 : SubtargetFeature<"fp64",
>          "FP64",
>          "true",
>          "Enable 64bit double precision operations">;
>  
> +def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
> +        "FP64Denormals",
> +        "true",
> +        "Enable double precision denormal handling",
> +        [FeatureFP64]>;
> +
> +def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
> +        "FP32Denormals",
> +        "true",
> +        "Enable single precision denormal handling">;
> +
>  def Feature64BitPtr : SubtargetFeature<"64BitPtr",
>          "Is64bit",
>          "true",
> Index: lib/Target/R600/AMDGPUAsmPrinter.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -53,11 +53,20 @@
>  // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
>  // and sin_f32, cos_f32 on most parts). We want to be able to use these so it's
>  // probably best to just report no single precision denormals.
> -static uint32_t getFPMode(MachineFunction &) {
> +static uint32_t getFPMode(const MachineFunction &F) {
> +  const AMDGPUSubtarget& ST = F.getTarget().getSubtarget<AMDGPUSubtarget>();
> +  // TODO: Is there any real use for the flush in only / flush out only modes?
> +
> +  uint32_t FP32Denormals =
> +    ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
> +
> +  uint32_t FP64Denormals =
> +    ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
> +
>    return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
>           FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
> -         FP_DENORM_MODE_SP(FP_DENORM_FLUSH_IN_FLUSH_OUT) |
> -         FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
> +         FP_DENORM_MODE_SP(FP32Denormals) |
> +         FP_DENORM_MODE_DP(FP64Denormals);
>  }
>  
>  static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
> Index: lib/Target/R600/AMDGPUInstructions.td
> ===================================================================
> --- lib/Target/R600/AMDGPUInstructions.td
> +++ lib/Target/R600/AMDGPUInstructions.td
> @@ -35,6 +35,8 @@
>  }
>  
>  def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
> +def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
> +def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
>  
>  def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
>  def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
> Index: lib/Target/R600/AMDGPUSubtarget.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUSubtarget.cpp
> +++ lib/Target/R600/AMDGPUSubtarget.cpp
> @@ -15,6 +15,7 @@
>  #include "AMDGPUSubtarget.h"
>  #include "R600InstrInfo.h"
>  #include "SIInstrInfo.h"
> +#include "llvm/ADT/SmallString.h"
>  
>  using namespace llvm;
>  
> @@ -36,17 +37,36 @@
>    TexVTXClauseSize = 0;
>    Gen = AMDGPUSubtarget::R600;
>    FP64 = false;
> +  FP64Denormals = false;
> +  FP32Denormals = false;
>    CaymanISA = false;
>    EnableIRStructurizer = true;
>    EnableIfCvt = true;
>    WavefrontSize = 0;
>    CFALUBug = false;
>    LocalMemorySize = 0;
> -  ParseSubtargetFeatures(GPU, FS);
> +
> +  // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
> +  // enabled, but some instructions do not respect them, so don't enable by
> +  // default.
> +  //
> +  // We want to be able to turn these off, but making this a subtarget feature
> +  // for SI has the unhelpful behavior that it unsets everything else if you
> +  // disable it.
> +  SmallString<128> FullFS("+fp64-denormals,");
> +  FullFS += FS;
> +
> +  ParseSubtargetFeatures(GPU, FullFS);
>    DevName = GPU;
>  
>    if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
>      InstrInfo.reset(new R600InstrInfo(*this));
> +
> +    // FIXME: I don't think think Evergreen has any useful support for
> +    // denormals, but should be checked. Should we issue a warning somewhere if
> +    // someone tries to enable these?
> +    FP32Denormals = false;
> +    FP64Denormals = false;
>    } else {
>      InstrInfo.reset(new SIInstrInfo(*this));
>    }
> Index: lib/Target/R600/AMDGPUSubtarget.h
> ===================================================================
> --- lib/Target/R600/AMDGPUSubtarget.h
> +++ lib/Target/R600/AMDGPUSubtarget.h
> @@ -51,6 +51,8 @@
>    short TexVTXClauseSize;
>    enum Generation Gen;
>    bool FP64;
> +  bool FP64Denormals;
> +  bool FP32Denormals;
>    bool CaymanISA;
>    bool EnableIRStructurizer;
>    bool EnableIfCvt;
> @@ -76,6 +78,14 @@
>    bool hasHWFP64() const;
>    bool hasCaymanISA() const;
>  
> +  bool hasFP32Denormals() const {
> +    return FP32Denormals;
> +  }
> +
> +  bool hasFP64Denormals() const {
> +    return FP64Denormals;
> +  }
> +
>    bool hasBFE() const {
>      return (getGeneration() >= EVERGREEN);
>    }
> Index: test/CodeGen/R600/default-fp-mode.ll
> ===================================================================
> --- test/CodeGen/R600/default-fp-mode.ll
> +++ test/CodeGen/R600/default-fp-mode.ll
> @@ -1,8 +1,27 @@
> -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
> +; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
>  
> -; SI-LABEL: @test_kernel
> -; SI: FloatMode: 192
> -; SI: IeeeMode: 0
> +; FUNC-LABEL: @test_kernel
> +
> +; DEFAULT: FloatMode: 192
> +; DEFAULT: IeeeMode: 0
> +
> +; FP64-DENORMAL: FloatMode: 192
> +; FP64-DENORMAL: IeeeMode: 0
> +
> +; FP32-DENORMAL: FloatMode: 48
> +; FP32-DENORMAL: IeeeMode: 0
> +
> +; BOTH-DENORMAL: FloatMode: 240
> +; BOTH-DENORMAL: IeeeMode: 0
> +
> +; NO-DENORMAL: FloatMode: 0
> +; NO-DENORMAL: IeeeMode: 0
>  define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
>    store float 0.0, float addrspace(1)* %out0
>    store double 0.0, double addrspace(1)* %out1

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits