[PATCH v2 1/1] R600: Limit FMA to EG+ with FP64 hw.

Tom Stellard tom at stellard.net
Tue Oct 14 10:10:41 PDT 2014


On Mon, Oct 13, 2014 at 11:10:05AM -0400, Jan Vesely wrote:
> v2: fixup nested predicates
> 
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>  lib/Target/R600/AMDGPUISelLowering.cpp   |  5 +++++
>  lib/Target/R600/AMDGPUInstructions.td    |  1 +
>  lib/Target/R600/AMDGPUSubtarget.h        |  4 ++++
>  lib/Target/R600/EvergreenInstructions.td | 16 ++++++++++------
>  4 files changed, 20 insertions(+), 6 deletions(-)

We need to add a test case for at least one of the non-fp64 EG/NI
cards to make sure FMA is not emitted.

> 
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index 6fd4317..b03ec72 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -244,6 +244,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
>      setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
>    }
>  
> +  if (!Subtarget->hasFMA()) {
> +    setOperationAction(ISD::FMA, MVT::f32, Expand);
> +    setOperationAction(ISD::FMA, MVT::f64, Expand);
> +  }
> +
>    setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
>  
>    setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
> diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> index a608627..e1dec7e 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -34,6 +34,7 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
>  
>  }
>  
> +def HWFP64 : Predicate<"Subtarget.hasHWFP64()">;
>  def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
>  def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
>  def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
> diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
> index 55a0c58..2bba6e0 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.h
> +++ b/lib/Target/R600/AMDGPUSubtarget.h
> @@ -169,6 +169,10 @@ public:
>      return (getGeneration() >= EVERGREEN);
>    }
>  
> +  bool hasFMA() const {
> +    return (getGeneration() >= EVERGREEN) && hasHWFP64();
> +  }
> +
>    bool IsIRStructurizerEnabled() const {
>      return EnableIRStructurizer;
>    }
> diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
> index 8117b60..92e37cd 100644
> --- a/lib/Target/R600/EvergreenInstructions.td
> +++ b/lib/Target/R600/EvergreenInstructions.td
> @@ -257,11 +257,16 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
>  
>  let Predicates = [isEGorCayman] in {
>  
> -// Should be predicated on FeatureFP64
> -// def FMA_64 : R600_3OP <
> -//   0xA, "FMA_64",
> -//   [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
> -// >;
> +let Predicates = [HWFP64,isEGorCayman] in {
> +
> +//def FMA_64 : R600_3OP <
> +//  0xA, "FMA_64",
> +//  [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
> +//>;
> +
> +def FMA_eg : FMA_Common<0x7>;
> +
> +}
>  
>  // BFE_UINT - bit_extract, an optimization for mask and shift
>  // Src0 = Input
> @@ -319,7 +324,6 @@ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
>  def : ROTRPattern <BIT_ALIGN_INT_eg>;
>  def MULADD_eg : MULADD_Common<0x14>;
>  def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
> -def FMA_eg : FMA_Common<0x7>;
>  def ASHR_eg : ASHR_Common<0x15>;
>  def LSHR_eg : LSHR_Common<0x16>;
>  def LSHL_eg : LSHL_Common<0x17>;
> -- 
> 1.9.3
> 



More information about the llvm-commits mailing list