[PATCH] R600/SI: Add FP mode bits to binary.

Thu Jun 26 06:42:46 PDT 2014

On Thu, Jun 19, 2014 at 06:24:31AM +0000, Matt Arsenault wrote:
> The default rounding mode to initialize the mode register needs
> to be reported to the runtime. Fill in other bits a kernel
> may be interested in setting for future use.
> 

LGTM.

> http://reviews.llvm.org/D4205
> 
> Files:
>   lib/Target/R600/AMDGPUAsmPrinter.cpp
>   lib/Target/R600/AMDGPUAsmPrinter.h
>   lib/Target/R600/SIDefines.h
>   test/CodeGen/R600/default-fp-mode.ll

> Index: lib/Target/R600/AMDGPUAsmPrinter.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -36,6 +36,22 @@
>  
>  using namespace llvm;
>  
> +// TODO: This should get the default rounding mode from the kernel. We just set
> +// the default here, but this could change if the OpenCL rounding mode pragmas
> +// are used.
> +//
> +// The denormal mode here should match what is reported by the OpenCL runtime
> +// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG. AMD
> +// OpenCL only sets flush none and reports CL_FP_DENORM for double precision,
> +// and leaves single precision to flush all and does not report CL_FP_DENORM for
> +// CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports CL_FP_DENORM for
> +// both.
> +static uint32_t getFPMode(MachineFunction &) {
> +  return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
> +         FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
> +         FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) |
> +         FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
> +}
>  
>  static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
>                                                MCStreamer &Streamer) {
> @@ -93,6 +109,10 @@
>                                   false);
>        OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
>                                   false);
> +      OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
> +                                 false);
> +      OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
> +                                 false);
>      } else {
>        R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
>        OutStreamer.emitRawComment(
> @@ -280,16 +300,27 @@
>    if (VCCUsed)
>      MaxSGPR += 2;
>  
> -  ProgInfo.CodeLen = CodeSize;
> -  ProgInfo.NumSGPR = MaxSGPR;
>    ProgInfo.NumVGPR = MaxVGPR;
> +  ProgInfo.NumSGPR = MaxSGPR;
> +
> +  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
> +  // register.
> +  ProgInfo.FloatMode = getFPMode(MF);
> +
> +  // XXX: Not quite sure what this does, but sc seems to unset this.
> +  ProgInfo.IEEEMode = 0;
> +
> +  // Do not clamp NAN to 0.
> +  ProgInfo.DX10Clamp = 0;
> +
> +  ProgInfo.CodeLen = CodeSize;
>  }
>  
>  void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
>                                           const SIProgramInfo &KernelInfo) {
>    const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
> -
>    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> +
>    unsigned RsrcReg;
>    switch (MFI->ShaderType) {
>    default: // Fall through
> @@ -299,25 +330,41 @@
>    case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
>    }
>  
> -  OutStreamer.EmitIntValue(RsrcReg, 4);
> -  OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
> -                           S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
> -
>    unsigned LDSAlignShift;
>    if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
> -    // LDS is allocated in 64 dword blocks
> +    // LDS is allocated in 64 dword blocks.
>      LDSAlignShift = 8;
>    } else {
> -    // LDS is allocated in 128 dword blocks
> +    // LDS is allocated in 128 dword blocks.
>      LDSAlignShift = 9;
>    }
> +
>    unsigned LDSBlocks =
> -          RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
> +    RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
>  
>    if (MFI->ShaderType == ShaderType::COMPUTE) {
> +    OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
> +
> +    const uint32_t ComputePGMRSrc1 =
> +      S_00B848_VGPRS(KernelInfo.NumVGPR / 4) |
> +      S_00B848_SGPRS(KernelInfo.NumSGPR / 8) |
> +      S_00B848_PRIORITY(KernelInfo.Priority) |
> +      S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
> +      S_00B848_PRIV(KernelInfo.Priv) |
> +      S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) |
> +      S_00B848_IEEE_MODE(KernelInfo.DebugMode) |
> +      S_00B848_IEEE_MODE(KernelInfo.IEEEMode);
> +
> +    OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
> +
>      OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
>      OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
> +  } else {
> +    OutStreamer.EmitIntValue(RsrcReg, 4);
> +    OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
> +                             S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
>    }
> +
>    if (MFI->ShaderType == ShaderType::PIXEL) {
>      OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
>      OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
> Index: lib/Target/R600/AMDGPUAsmPrinter.h
> ===================================================================
> --- lib/Target/R600/AMDGPUAsmPrinter.h
> +++ lib/Target/R600/AMDGPUAsmPrinter.h
> @@ -25,13 +25,28 @@
>  private:
>    struct SIProgramInfo {
>      SIProgramInfo() :
> -      CodeLen(0),
> +      NumVGPR(0),
>        NumSGPR(0),
> -      NumVGPR(0) {}
> +      Priority(0),
> +      FloatMode(0),
> +      Priv(0),
> +      DX10Clamp(0),
> +      DebugMode(0),
> +      IEEEMode(0),
> +      CodeLen(0) {}
>  
> +    // Fields set in PGM_RSRC1 pm4 packet.
> +    uint32_t NumVGPR;
> +    uint32_t NumSGPR;
> +    uint32_t Priority;
> +    uint32_t FloatMode;
> +    uint32_t Priv;
> +    uint32_t DX10Clamp;
> +    uint32_t DebugMode;
> +    uint32_t IEEEMode;
> +
> +    // Bonus information for debugging.
>      uint64_t CodeLen;
> -    unsigned NumSGPR;
> -    unsigned NumVGPR;
>    };
>  
>    void getSIProgramInfo(SIProgramInfo &Out, MachineFunction &MF) const;
> Index: lib/Target/R600/SIDefines.h
> ===================================================================
> --- lib/Target/R600/SIDefines.h
> +++ lib/Target/R600/SIDefines.h
> @@ -35,4 +35,54 @@
>  #define   S_00B84C_LDS_SIZE(x)                                        (((x) & 0x1FF) << 15)
>  #define R_0286CC_SPI_PS_INPUT_ENA                                       0x0286CC
>  
> +
> +#define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
> +#define   S_00B848_VGPRS(x)                                           (((x) & 0x3F) << 0)
> +#define   G_00B848_VGPRS(x)                                           (((x) >> 0) & 0x3F)
> +#define   C_00B848_VGPRS                                              0xFFFFFFC0
> +#define   S_00B848_SGPRS(x)                                           (((x) & 0x0F) << 6)
> +#define   G_00B848_SGPRS(x)                                           (((x) >> 6) & 0x0F)
> +#define   C_00B848_SGPRS                                              0xFFFFFC3F
> +#define   S_00B848_PRIORITY(x)                                        (((x) & 0x03) << 10)
> +#define   G_00B848_PRIORITY(x)                                        (((x) >> 10) & 0x03)
> +#define   C_00B848_PRIORITY                                           0xFFFFF3FF
> +#define   S_00B848_FLOAT_MODE(x)                                      (((x) & 0xFF) << 12)
> +#define   G_00B848_FLOAT_MODE(x)                                      (((x) >> 12) & 0xFF)
> +#define   C_00B848_FLOAT_MODE                                         0xFFF00FFF
> +#define   S_00B848_PRIV(x)                                            (((x) & 0x1) << 20)
> +#define   G_00B848_PRIV(x)                                            (((x) >> 20) & 0x1)
> +#define   C_00B848_PRIV                                               0xFFEFFFFF
> +#define   S_00B848_DX10_CLAMP(x)                                      (((x) & 0x1) << 21)
> +#define   G_00B848_DX10_CLAMP(x)                                      (((x) >> 21) & 0x1)
> +#define   C_00B848_DX10_CLAMP                                         0xFFDFFFFF
> +#define   S_00B848_DEBUG_MODE(x)                                      (((x) & 0x1) << 22)
> +#define   G_00B848_DEBUG_MODE(x)                                      (((x) >> 22) & 0x1)
> +#define   C_00B848_DEBUG_MODE                                         0xFFBFFFFF
> +#define   S_00B848_IEEE_MODE(x)                                       (((x) & 0x1) << 23)
> +#define   G_00B848_IEEE_MODE(x)                                       (((x) >> 23) & 0x1)
> +#define   C_00B848_IEEE_MODE                                          0xFF7FFFFF
> +
> +
> +// Helpers for setting FLOAT_MODE
> +#define FP_ROUND_ROUND_TO_NEAREST 0
> +#define FP_ROUND_ROUND_TO_INF 1
> +#define FP_ROUND_ROUND_TO_NEGINF 2
> +#define FP_ROUND_ROUND_TO_ZERO 3
> +
> +// Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double
> +// precision.
> +#define FP_ROUND_MODE_SP(x) ((x) & 0x3)
> +#define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2)
> +
> +#define FP_DENORM_FLUSH_IN_FLUSH_OUT 0
> +#define FP_DENORM_FLUSH_OUT 1
> +#define FP_DENORM_FLUSH_IN 2
> +#define FP_DENORM_FLUSH_NONE 3
> +
> +
> +// Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double
> +// precision.
> +#define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4)
> +#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
> +
>  #endif // SIDEFINES_H_
> Index: test/CodeGen/R600/default-fp-mode.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/default-fp-mode.ll
> @@ -0,0 +1,10 @@
> +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
> +
> +; SI-LABEL: @test_kernel
> +; SI: FloatMode: 240
> +; SI: IeeeMode: 0
> +define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
> +  store float 0.0, float addrspace(1)* %out0
> +  store double 0.0, double addrspace(1)* %out1
> +  ret void
> +}

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits