[PATCH RESEND v2 1/1] R600: Expand vector sin and cos.

Fri May 2 08:54:50 PDT 2014

On Thu, May 01, 2014 at 01:13:12PM -0400, Jan Vesely wrote:
> v2: move code to AMDGPUISelLowering.cpp
>     squash with tests (both EG and SI)
> 
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
> 
> Hi Tom,
> 
> this version should address both of your concerns. It is the same patch as the
> one attached to email on 4/21. I'm resending as patch-mail, just in case it
> got lost.
> 

Committed as r207845.

You should apply for commit access, so you can commit patches on your own
after they have been reviewed.  This will help preserve the authorship
information of your patches and also reduce the time you have to wait
for it to be committed:

http://llvm.org/docs/DeveloperPolicy.html#obtaining-commit-access

-Tom

> regards,
> Jan
> 
> 
>  lib/Target/R600/AMDGPUISelLowering.cpp |  2 ++
>  test/CodeGen/R600/llvm.cos.ll          | 43 ++++++++++++++++++++++++---------
>  test/CodeGen/R600/llvm.sin.ll          | 44 +++++++++++++++++++++++++---------
>  3 files changed, 67 insertions(+), 22 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index 52a500c..9dc6611 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -244,6 +244,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
>      MVT::SimpleValueType VT = FloatTypes[x];
>      setOperationAction(ISD::FABS, VT, Expand);
>      setOperationAction(ISD::FADD, VT, Expand);
> +    setOperationAction(ISD::FCOS, VT, Expand);
>      setOperationAction(ISD::FDIV, VT, Expand);
>      setOperationAction(ISD::FPOW, VT, Expand);
>      setOperationAction(ISD::FFLOOR, VT, Expand);
> @@ -251,6 +252,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
>      setOperationAction(ISD::FMUL, VT, Expand);
>      setOperationAction(ISD::FRINT, VT, Expand);
>      setOperationAction(ISD::FSQRT, VT, Expand);
> +    setOperationAction(ISD::FSIN, VT, Expand);
>      setOperationAction(ISD::FSUB, VT, Expand);
>      setOperationAction(ISD::SELECT, VT, Expand);
>    }
> diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
> index aaf2305..9e7a4de 100644
> --- a/test/CodeGen/R600/llvm.cos.ll
> +++ b/test/CodeGen/R600/llvm.cos.ll
> @@ -1,19 +1,40 @@
> -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC
> +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
>  
> -;CHECK: MULADD_IEEE *
> -;CHECK: FRACT *
> -;CHECK: ADD *
> -;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;FUNC-LABEL: test
> +;EG: MULADD_IEEE *
> +;EG: FRACT *
> +;EG: ADD *
> +;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;EG-NOT: COS
> +;SI: V_COS_F32
> +;SI-NOT: V_COS_F32
>  
> -define void @test(<4 x float> inreg %reg0) #0 {
> -   %r0 = extractelement <4 x float> %reg0, i32 0
> -   %r1 = call float @llvm.cos.f32(float %r0)
> -   %vec = insertelement <4 x float> undef, float %r1, i32 0
> -   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
> +define void @test(float addrspace(1)* %out, float %x) #1 {
> +   %cos = call float @llvm.cos.f32(float %x)
> +   store float %cos, float addrspace(1)* %out
> +   ret void
> +}
> +
> +;FUNC-LABEL: testv
> +;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;EG-NOT: COS
> +;SI: V_COS_F32
> +;SI: V_COS_F32
> +;SI: V_COS_F32
> +;SI: V_COS_F32
> +;SI-NOT: V_COS_F32
> +
> +define void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 {
> +   %cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx)
> +   store <4 x float> %cos, <4 x float> addrspace(1)* %out
>     ret void
>  }
>  
>  declare float @llvm.cos.f32(float) readnone
> -declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
> +declare <4 x float> @llvm.cos.v4f32(<4 x float>) readnone
>  
>  attributes #0 = { "ShaderType"="0" }
> diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
> index 9eb9983..41c363c 100644
> --- a/test/CodeGen/R600/llvm.sin.ll
> +++ b/test/CodeGen/R600/llvm.sin.ll
> @@ -1,19 +1,41 @@
> -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC
> +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
>  
> -;CHECK: MULADD_IEEE *
> -;CHECK: FRACT *
> -;CHECK: ADD *
> -;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;FUNC-LABEL: test
> +;EG: MULADD_IEEE *
> +;EG: FRACT *
> +;EG: ADD *
> +;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;EG-NOT: SIN
> +;SI: V_MUL_F32
> +;SI: V_SIN_F32
> +;SI-NOT: V_SIN_F32
>  
> -define void @test(<4 x float> inreg %reg0) #0 {
> -   %r0 = extractelement <4 x float> %reg0, i32 0
> -   %r1 = call float @llvm.sin.f32( float %r0)
> -   %vec = insertelement <4 x float> undef, float %r1, i32 0
> -   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
> +define void @test(float addrspace(1)* %out, float %x) #1 {
> +   %sin = call float @llvm.sin.f32(float %x)
> +   store float %sin, float addrspace(1)* %out
> +   ret void
> +}
> +
> +;FUNC-LABEL: testv
> +;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +;EG-NOT: SIN
> +;SI: V_SIN_F32
> +;SI: V_SIN_F32
> +;SI: V_SIN_F32
> +;SI: V_SIN_F32
> +;SI-NOT: V_SIN_F32
> +
> +define void @testv(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
> +   %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
> +   store <4 x float> %sin, <4 x float> addrspace(1)* %out
>     ret void
>  }
>  
>  declare float @llvm.sin.f32(float) readnone
> -declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
> +declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone
>  
>  attributes #0 = { "ShaderType"="0" }
> -- 
> 1.9.0
>