[PATCH] R600: Add FMA instruction

Tom Stellard tom at stellard.net
Mon Jul 14 12:30:45 PDT 2014


On Thu, Jun 26, 2014 at 01:49:53AM +0000, Matt Arsenault wrote:
> http://reviews.llvm.org/D4307
> 
> Files:
>   lib/Target/R600/EvergreenInstructions.td
>   lib/Target/R600/R600Instructions.td
>   test/CodeGen/R600/fma.f64.ll
>   test/CodeGen/R600/fma.ll

The FMA instructions need to be predicated on FeatureFP64, since not all
Evergreen support doubles.

-Tom

> Index: lib/Target/R600/EvergreenInstructions.td
> ===================================================================
> --- lib/Target/R600/EvergreenInstructions.td
> +++ lib/Target/R600/EvergreenInstructions.td
> @@ -256,6 +256,11 @@
>  
>  let Predicates = [isEGorCayman] in {
>  
> +// def FMA_64 : R600_3OP <
> +//   0xA, "FMA_64",
> +//   [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
> +// >;
> +
>  // BFE_UINT - bit_extract, an optimization for mask and shift
>  // Src0 = Input
>  // Src1 = Offset
> @@ -312,6 +317,7 @@
>  def : ROTRPattern <BIT_ALIGN_INT_eg>;
>  def MULADD_eg : MULADD_Common<0x14>;
>  def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
> +def FMA_eg : FMA_Common<0x7>;
>  def ASHR_eg : ASHR_Common<0x15>;
>  def LSHR_eg : LSHR_Common<0x16>;
>  def LSHL_eg : LSHL_Common<0x17>;
> Index: lib/Target/R600/R600Instructions.td
> ===================================================================
> --- lib/Target/R600/R600Instructions.td
> +++ lib/Target/R600/R600Instructions.td
> @@ -915,6 +915,11 @@
>    [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
>  >;
>  
> +class FMA_Common <bits<5> inst> : R600_3OP <
> +  inst, "FMA",
> +  [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
> +>;
> +
>  class CNDE_Common <bits<5> inst> : R600_3OP <
>    inst, "CNDE",
>    [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
> Index: test/CodeGen/R600/fma.f64.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/fma.f64.ll
> @@ -0,0 +1,46 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +
> +declare double @llvm.fma.f64(double, double, double) nounwind readnone
> +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
> +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
> +
> +
> +; FUNC-LABEL: @fma_f64
> +; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
> +define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
> +                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
> +   %r0 = load double addrspace(1)* %in1
> +   %r1 = load double addrspace(1)* %in2
> +   %r2 = load double addrspace(1)* %in3
> +   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
> +   store double %r3, double addrspace(1)* %out
> +   ret void
> +}
> +
> +; FUNC-LABEL: @fma_v2f64
> +; SI: V_FMA_F64
> +; SI: V_FMA_F64
> +define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
> +                       <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
> +   %r0 = load <2 x double> addrspace(1)* %in1
> +   %r1 = load <2 x double> addrspace(1)* %in2
> +   %r2 = load <2 x double> addrspace(1)* %in3
> +   %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
> +   store <2 x double> %r3, <2 x double> addrspace(1)* %out
> +   ret void
> +}
> +
> +; FUNC-LABEL: @fma_v4f64
> +; SI: V_FMA_F64
> +; SI: V_FMA_F64
> +; SI: V_FMA_F64
> +; SI: V_FMA_F64
> +define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
> +                       <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
> +   %r0 = load <4 x double> addrspace(1)* %in1
> +   %r1 = load <4 x double> addrspace(1)* %in2
> +   %r2 = load <4 x double> addrspace(1)* %in3
> +   %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
> +   store <4 x double> %r3, <4 x double> addrspace(1)* %out
> +   ret void
> +}
> Index: test/CodeGen/R600/fma.ll
> ===================================================================
> --- test/CodeGen/R600/fma.ll
> +++ test/CodeGen/R600/fma.ll
> @@ -1,13 +1,10 @@
>  ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
>  
>  declare float @llvm.fma.f32(float, float, float) nounwind readnone
>  declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
>  declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
>  
> -declare double @llvm.fma.f64(double, double, double) nounwind readnone
> -declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
> -declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
> -
>  ; FUNC-LABEL: @fma_f32
>  ; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
>  define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
> @@ -47,43 +44,3 @@
>     store <4 x float> %r3, <4 x float> addrspace(1)* %out
>     ret void
>  }
> -
> -; FUNC-LABEL: @fma_f64
> -; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
> -define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
> -                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
> -   %r0 = load double addrspace(1)* %in1
> -   %r1 = load double addrspace(1)* %in2
> -   %r2 = load double addrspace(1)* %in3
> -   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
> -   store double %r3, double addrspace(1)* %out
> -   ret void
> -}
> -
> -; FUNC-LABEL: @fma_v2f64
> -; SI: V_FMA_F64
> -; SI: V_FMA_F64
> -define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
> -                       <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
> -   %r0 = load <2 x double> addrspace(1)* %in1
> -   %r1 = load <2 x double> addrspace(1)* %in2
> -   %r2 = load <2 x double> addrspace(1)* %in3
> -   %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
> -   store <2 x double> %r3, <2 x double> addrspace(1)* %out
> -   ret void
> -}
> -
> -; FUNC-LABEL: @fma_v4f64
> -; SI: V_FMA_F64
> -; SI: V_FMA_F64
> -; SI: V_FMA_F64
> -; SI: V_FMA_F64
> -define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
> -                       <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
> -   %r0 = load <4 x double> addrspace(1)* %in1
> -   %r1 = load <4 x double> addrspace(1)* %in2
> -   %r2 = load <4 x double> addrspace(1)* %in3
> -   %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
> -   store <4 x double> %r3, <4 x double> addrspace(1)* %out
> -   ret void
> -}

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list