[PATCH v2 1/1] R600: Limit FMA to EG+ with FP64 hw.
Tom Stellard
tom at stellard.net
Tue Oct 14 10:10:41 PDT 2014
On Mon, Oct 13, 2014 at 11:10:05AM -0400, Jan Vesely wrote:
> v2: fixup nested predicates
>
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
> lib/Target/R600/AMDGPUISelLowering.cpp | 5 +++++
> lib/Target/R600/AMDGPUInstructions.td | 1 +
> lib/Target/R600/AMDGPUSubtarget.h | 4 ++++
> lib/Target/R600/EvergreenInstructions.td | 16 ++++++++++------
> 4 files changed, 20 insertions(+), 6 deletions(-)
We need to add a test case for at least one of the non-fp64 EG/NI
cards to make sure FMA is not emitted.
>
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index 6fd4317..b03ec72 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -244,6 +244,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
> }
>
> + if (!Subtarget->hasFMA()) {
> + setOperationAction(ISD::FMA, MVT::f32, Expand);
> + setOperationAction(ISD::FMA, MVT::f64, Expand);
> + }
> +
> setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
>
> setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
> diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> index a608627..e1dec7e 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -34,6 +34,7 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
>
> }
>
> +def HWFP64 : Predicate<"Subtarget.hasHWFP64()">;
> def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
> def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
> def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
> diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
> index 55a0c58..2bba6e0 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.h
> +++ b/lib/Target/R600/AMDGPUSubtarget.h
> @@ -169,6 +169,10 @@ public:
> return (getGeneration() >= EVERGREEN);
> }
>
> + bool hasFMA() const {
> + return (getGeneration() >= EVERGREEN) && hasHWFP64();
> + }
> +
> bool IsIRStructurizerEnabled() const {
> return EnableIRStructurizer;
> }
> diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
> index 8117b60..92e37cd 100644
> --- a/lib/Target/R600/EvergreenInstructions.td
> +++ b/lib/Target/R600/EvergreenInstructions.td
> @@ -257,11 +257,16 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
>
> let Predicates = [isEGorCayman] in {
>
> -// Should be predicated on FeatureFP64
> -// def FMA_64 : R600_3OP <
> -// 0xA, "FMA_64",
> -// [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
> -// >;
> +let Predicates = [HWFP64,isEGorCayman] in {
> +
> +//def FMA_64 : R600_3OP <
> +// 0xA, "FMA_64",
> +// [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
> +//>;
> +
> +def FMA_eg : FMA_Common<0x7>;
> +
> +}
>
> // BFE_UINT - bit_extract, an optimization for mask and shift
> // Src0 = Input
> @@ -319,7 +324,6 @@ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
> def : ROTRPattern <BIT_ALIGN_INT_eg>;
> def MULADD_eg : MULADD_Common<0x14>;
> def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
> -def FMA_eg : FMA_Common<0x7>;
> def ASHR_eg : ASHR_Common<0x15>;
> def LSHR_eg : LSHR_Common<0x16>;
> def LSHL_eg : LSHL_Common<0x17>;
> --
> 1.9.3
>
More information about the llvm-commits
mailing list