[PATCH] R600/SI: Add patterns for v_cvt_{flr|rpi}_i32_f32

Tom Stellard tom at stellard.net
Thu Jan 15 15:51:51 PST 2015


On Thu, Jan 15, 2015 at 11:15:49PM +0000, Matt Arsenault wrote:
> http://reviews.llvm.org/D7011
> 

LGTM.

> Files:
>   lib/Target/R600/AMDGPUInstructions.td
>   lib/Target/R600/SIInstructions.td
>   test/CodeGen/R600/cvt_flr_i32_f32.ll
>   test/CodeGen/R600/cvt_rpi_i32_f32.ll
> 
> EMAIL PREFERENCES
>   http://reviews.llvm.org/settings/panel/emailpreferences/

> Index: lib/Target/R600/AMDGPUInstructions.td
> ===================================================================
> --- lib/Target/R600/AMDGPUInstructions.td
> +++ lib/Target/R600/AMDGPUInstructions.td
> @@ -438,6 +438,11 @@
>    [{return N->isExactlyValue(1.0);}]
>  >;
>  
> +def FP_HALF : PatLeaf <
> +  (fpimm),
> +  [{return N->isExactlyValue(0.5);}]
> +>;
> +
>  let isCodeGenOnly = 1, isPseudo = 1 in {
>  
>  let usesCustomInserter = 1  in {
> @@ -603,6 +608,18 @@
>  // 24-bit arithmetic patterns
>  def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>;
>  
> +// Special conversion patterns
> +
> +def cvt_rpi_i32_f32 : PatFrag <
> +  (ops node:$src),
> +  (fp_to_sint (ffloor (fadd $src, FP_HALF)))
> +>;
> +
> +def cvt_flr_i32_f32 : PatFrag <
> +  (ops node:$src),
> +  (fp_to_sint (ffloor $src))
> +>;
> +
>  /*
>  class UMUL24Pattern <Instruction UMUL24> : Pat <
>    (mul U24:$x, U24:$y),
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -1230,8 +1230,10 @@
>  defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
>    VOP_F32_I32, f16_to_fp
>  >;
> -//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "v_cvt_rpi_i32_f32", []>;
> -//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "v_cvt_flr_i32_f32", []>;
> +defm V_CVT_RPI_I32_F32 : VOP1Inst <vop1<0xc>, "v_cvt_rpi_i32_f32",
> +  VOP_I32_F32, cvt_rpi_i32_f32>;
> +defm V_CVT_FLR_I32_F32 : VOP1Inst <vop1<0xd>, "v_cvt_flr_i32_f32",
> +  VOP_I32_F32, cvt_flr_i32_f32>;
>  //defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "v_cvt_off_f32_i4", []>;
>  defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
>    VOP_F32_F64, fround
> Index: test/CodeGen/R600/cvt_flr_i32_f32.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/cvt_flr_i32_f32.ll
> @@ -0,0 +1,79 @@
> +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +
> +declare float @llvm.fabs.f32(float) #1
> +declare float @llvm.floor.f32(float) #1
> +
> +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_0:
> +; SI-NOT: add
> +; SI: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
> +; SI: s_endpgm
> +define void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
> +  %floor = call float @llvm.floor.f32(float %x) #1
> +  %cvt = fptosi float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1:
> +; SI: v_add_f32_e64 [[TMP:v[0-9]+]], 1.0, s{{[0-9]+}}
> +; SI: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]]
> +; SI: s_endpgm
> +define void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 {
> +  %fadd = fadd float %x, 1.0
> +  %floor = call float @llvm.floor.f32(float %fadd) #1
> +  %cvt = fptosi float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs:
> +; SI-NOT: add
> +; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|
> +; SI: s_endpgm
> +define void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
> +  %x.fabs = call float @llvm.fabs.f32(float %x) #1
> +  %floor = call float @llvm.floor.f32(float %x.fabs) #1
> +  %cvt = fptosi float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fneg:
> +; SI-NOT: add
> +; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}
> +; SI: s_endpgm
> +define void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
> +  %x.fneg = fsub float -0.000000e+00, %x
> +  %floor = call float @llvm.floor.f32(float %x.fneg) #1
> +  %cvt = fptosi float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs_fneg:
> +; SI-NOT: add
> +; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|
> +; SI: s_endpgm
> +define void @cvt_flr_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
> +  %x.fabs = call float @llvm.fabs.f32(float %x) #1
> +  %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
> +  %floor = call float @llvm.floor.f32(float %x.fabs.fneg) #1
> +  %cvt = fptosi float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}no_cvt_flr_i32_f32_0:
> +; SI-NOT: v_cvt_flr_i32_f32
> +; SI: v_floor_f32
> +; SI: v_cvt_u32_f32_e32
> +; SI: s_endpgm
> +define void @no_cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
> +  %floor = call float @llvm.floor.f32(float %x) #1
> +  %cvt = fptoui float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +attributes #0 = { nounwind }
> +attributes #1 = { nounwind readnone }
> Index: test/CodeGen/R600/cvt_rpi_i32_f32.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/cvt_rpi_i32_f32.ll
> @@ -0,0 +1,76 @@
> +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +
> +declare float @llvm.fabs.f32(float) #1
> +declare float @llvm.floor.f32(float) #1
> +
> +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32:
> +; SI: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
> +; SI: s_endpgm
> +define void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 {
> +  %fadd = fadd float %x, 0.5
> +  %floor = call float @llvm.floor.f32(float %fadd) #1
> +  %cvt = fptosi float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs:
> +; SI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
> +; SI: s_endpgm
> +define void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
> +  %x.fabs = call float @llvm.fabs.f32(float %x) #1
> +  %fadd = fadd float %x.fabs, 0.5
> +  %floor = call float @llvm.floor.f32(float %fadd) #1
> +  %cvt = fptosi float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FIXME: This doesn't work because it forms fsub 0
> +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fneg:
> +; XSI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}
> +; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, s{{[0-9]+}}
> +; SI: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
> +; SI: s_endpgm
> +define void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
> +  %x.fneg = fsub float -0.000000e+00, %x
> +  %fadd = fadd float %x.fneg, 0.5
> +  %floor = call float @llvm.floor.f32(float %fadd) #1
> +  %cvt = fptosi float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FIXME: This doesn't work for same reason as above
> +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs_fneg:
> +; XSI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|
> +
> +; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, |s{{[0-9]+}}|
> +; SI: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
> +; SI: s_endpgm
> +define void @cvt_rpi_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
> +  %x.fabs = call float @llvm.fabs.f32(float %x) #1
> +  %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
> +  %fadd = fadd float %x.fabs.fneg, 0.5
> +  %floor = call float @llvm.floor.f32(float %fadd) #1
> +  %cvt = fptosi float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}no_cvt_rpi_i32_f32_0:
> +; SI-NOT: v_cvt_rpi_i32_f32
> +; SI: v_add_f32
> +; SI: v_floor_f32
> +; SI: v_cvt_u32_f32
> +; SI: s_endpgm
> +define void @no_cvt_rpi_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
> +  %fadd = fadd float %x, 0.5
> +  %floor = call float @llvm.floor.f32(float %fadd) #1
> +  %cvt = fptoui float %floor to i32
> +  store i32 %cvt, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +attributes #0 = { nounwind }
> +attributes #1 = { nounwind readnone }

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list