[PATCH v3] R600/SI: Add pattern for AMDGPUurecip

Wed Apr 10 09:50:37 PDT 2013

On Wed, Apr 10, 2013 at 05:59:48PM +0200, Michel Dänzer wrote:
> From: Michel Dänzer <michel.daenzer at amd.com>
> 
> 21 more little piglits with radeonsi.
> 
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
> 
> v3: Use constant for and add comments about scaling multiplications
> 
>  lib/Target/R600/AMDGPUInstructions.td |  1 +
>  lib/Target/R600/R600Instructions.td   |  3 ++-
>  lib/Target/R600/SIInstructions.td     | 12 ++++++++++--
>  test/CodeGen/R600/urecip.ll           | 12 ++++++++++++
>  4 files changed, 25 insertions(+), 3 deletions(-)
>  create mode 100644 test/CodeGen/R600/urecip.ll
> 
> diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> index e740348..fa890c1 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -94,6 +94,7 @@ class Constants {
>  int TWO_PI = 0x40c90fdb;
>  int PI = 0x40490fdb;
>  int TWO_PI_INV = 0x3e22f983;
> +int FP_UINT_MAX_PLUS_1 = 0x4f800000;	// 1 << 32 in floating point encoding
>  }
>  def CONST : Constants;
>  
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index b4c45e1..8ede6cc 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -1923,10 +1923,11 @@ def : COS_PAT <COS_cm>;
>  defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
>  
>  // RECIP_UINT emulation for Cayman
> +// The multiplication scales from [0,1] to the unsigned integer range
>  def : Pat <
>    (AMDGPUurecip R600_Reg32:$src0),
>    (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
> -                            (MOV_IMM_I32 0x4f800000)))
> +                            (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
>  >;
>  
>  
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index e2a08fc..0226d5a 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -602,8 +602,8 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
>  defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
>    [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
>  >;
> -//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
> -//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
> +defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
> +defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
>  defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
>    [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
>  >;
> @@ -1514,6 +1514,14 @@ def : Pat <
>    (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
>  >;
>  
> +// The multiplication scales from [0,1] to the unsigned integer range
> +def : Pat <
> +  (AMDGPUurecip VSrc_32:$src0),
> +  (V_CVT_U32_F32_e32
> +    (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
> +                   (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 VSrc_32:$src0))))
> +>;
> +

We should start using the updated pattern syntax for all new patterns.
This means replacing register classes with types for the input patterns
and omitting the type in the output pattern:

def : Pat <
  (AMDGPUurecip i32:$src0),
  (V_CVT_U32_F32_e32
    (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
                   (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))

With that change:

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
>  /********** ================== **********/
>  /**********   VOP3 Patterns    **********/
>  /********** ================== **********/
> diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll
> new file mode 100644
> index 0000000..dad02dd
> --- /dev/null
> +++ b/test/CodeGen/R600/urecip.ll
> @@ -0,0 +1,12 @@
> +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
> +
> +;CHECK: V_RCP_IFLAG_F32_e32
> +
> +define void @test(i32 %p, i32 %q) {
> +   %i = udiv i32 %p, %q
> +   %r = bitcast i32 %i to float
> +   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
> +   ret void
> +}
> +
> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> -- 
> 1.8.2
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits