[PATCH v3] R600/SI: Add pattern for AMDGPUurecip

Tom Stellard tom at stellard.net
Fri Apr 19 21:06:28 PDT 2013


On Thu, Apr 11, 2013 at 10:12:01AM +0200, Christian König wrote:
> Am 10.04.2013 18:50, schrieb Tom Stellard:
> >On Wed, Apr 10, 2013 at 05:59:48PM +0200, Michel Dänzer wrote:
> >>[SNIP]
> >We should start using the updated pattern syntax for all new patterns.
> >This means replacing register classes with types for the input patterns
> >and omitting the type in the output pattern:
> >
> >def : Pat <
> >   (AMDGPUurecip i32:$src0),
> >   (V_CVT_U32_F32_e32
> >     (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
> >                    (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
> >
> >With that change:
> >
> >Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> 
> BTW: I created the attached patches two weeks ago. They rework most
> of the existing patterns on SI to use the new format, but I
> currently don't have time to rebase, test & commit them. They
> shouldn't change anything in functionality, so if you guys think
> they are ok then please review and commit them.
> 

Thanks for doing this.  I've thrown these patches into a branch along
with changes to the R600 patterns.  I will try to test them next week.
Is there any reason why we can't squash all these patches together before
we commit?

-Tom


> Thanks,
> Christian.

> From f0175c616db5f6d3f1024137edbd8773c118f7dc Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 12:50:55 +0100
> Subject: [PATCH 1/9] R600/SI: remove nonsense select pattern
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Fortunately this pattern never matched, otherwise
> we would have generated incorrect code.
> 
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
>  lib/Target/R600/SIInstructions.td |    9 +--------
>  1 file changed, 1 insertion(+), 8 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index eb410d7..e37003e 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1019,18 +1019,11 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
>  def S_CSELECT_B32 : SOP2 <
>    0x0000000a, (outs SReg_32:$dst),
>    (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
> -  [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
> -                                     SReg_32:$src0, SReg_32:$src1))]
> +  []
>  >;
>  
>  def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
>  
> -// f32 pattern for S_CSELECT_B32
> -def : Pat <
> -  (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
> -  (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
> ->;
> -
>  def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
>  
>  def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
> -- 
> 1.7.10.4
> 

> From 7a2c0f084fa9ac949084a2c719d9944dd680a866 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:18:00 +0100
> Subject: [PATCH 2/9] R600/SI: start reworking patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> We don't need register classes in patterns any longer.
> Let's start with the indirect addressing patterns.
> 
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
>  lib/Target/R600/SIInstructions.td |   36 ++++++++++++++----------------------
>  1 file changed, 14 insertions(+), 22 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index e37003e..6ee3923 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1542,45 +1542,37 @@ defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
>  /**********   Indirect adressing   **********/
>  /********** ====================== **********/
>  
> -multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
> -                                SI_INDIRECT_DST IndDst> {
> +multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
> +
>    // 1. Extract with offset
>    def : Pat<
> -    (vector_extract (vt rc:$vec),
> -      (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
> -    ),
> -    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
> +    (vector_extract vt:$vec, (i64 (zext (add i32:$idx, imm:$off)))),
> +    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), vt:$vec, VReg_32:$idx, imm:$off))
>    >;
>  
>    // 2. Extract without offset
>    def : Pat<
> -    (vector_extract (vt rc:$vec),
> -      (i64 (zext (i32 VReg_32:$idx)))
> -    ),
> -    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
> +    (vector_extract vt:$vec, (i64 (zext i32:$idx))),
> +    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), vt:$vec, i32:$idx, 0))
>    >;
>  
>    // 3. Insert with offset
>    def : Pat<
> -    (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
> -      (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
> -    ),
> -    (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
> +    (vector_insert vt:$vec, f32:$val, (i64 (zext (add i32:$idx, imm:$off)))),
> +    (IndDst (IMPLICIT_DEF), vt:$vec, i32:$idx, imm:$off, f32:$val)
>    >;
>  
>    // 4. Insert without offset
>    def : Pat<
> -    (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
> -      (i64 (zext (i32 VReg_32:$idx)))
> -    ),
> -    (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
> +    (vector_insert vt:$vec, f32:$val, (i64 (zext i32:$idx))),
> +    (IndDst (IMPLICIT_DEF), vt:$vec, i32:$idx, 0, f32:$val)
>    >;
>  }
>  
> -defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
> -defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
> -defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
> -defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
> +defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>;
> +defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>;
> +defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>;
> +defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>;
>  
>  /********** =============== **********/
>  /**********   Conditions    **********/
> -- 
> 1.7.10.4
> 

> From 3c102c001b7e707cadc1261caabf5ef0a01e0434 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:22:53 +0100
> Subject: [PATCH 3/9] R600/SI: remove reg classes from constant load patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
>  lib/Target/R600/SIInstructions.td |   13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 6ee3923..48e1698 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1514,22 +1514,23 @@ def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
>  /********** ================== **********/
>  
>  multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
> +
>    // 1. Offset as 8bit DWORD immediate
>    def : Pat <
> -    (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)),
> -    (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset))
> +    (constant_load (SIadd64bit32bit i64:$sbase, IMM8bitDWORD:$offset)),
> +    (vt (Instr_IMM i64:$sbase, IMM8bitDWORD:$offset))
>    >;
>  
>    // 2. Offset loaded in an 32bit SGPR
>    def : Pat <
> -    (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
> -    (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
> +    (constant_load (SIadd64bit32bit i64:$sbase, imm:$offset)),
> +    (vt (Instr_SGPR i64:$sbase, (S_MOV_B32 imm:$offset)))
>    >;
>  
>    // 3. No offset at all
>    def : Pat <
> -    (constant_load SReg_64:$sbase),
> -    (vt (Instr_IMM SReg_64:$sbase, 0))
> +    (constant_load i64:$sbase),
> +    (vt (Instr_IMM i64:$sbase, 0))
>    >;
>  }
>  
> -- 
> 1.7.10.4
> 

> From c4aad71aa012d62b5a6760f7660b20446b9a184e Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:26:31 +0100
> Subject: [PATCH 4/9] R600/SI: remove reg classes from VOP3 patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
>  lib/Target/R600/SIInstructions.td |    7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 48e1698..a33ee69 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1505,9 +1505,10 @@ def : Pat <
>  /**********   VOP3 Patterns    **********/
>  /********** ================== **********/
>  
> -def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
> -           (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
> -            0, 0, 0, 0)>;
> +def : Pat <
> +  (f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)),
> +  (V_MAD_F32 f32:$src0, f32:$src1, f32:$src2, 0, 0, 0, 0)
> +>;
>  
>  /********** ================== **********/
>  /**********   SMRD Patterns    **********/
> -- 
> 1.7.10.4
> 

> From fd8fe9d538267aa578ecea740c9eb53f78e3b923 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:33:38 +0100
> Subject: [PATCH 5/9] R600/SI: remove reg classes from instrinsic patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
>  lib/Target/R600/SIInstructions.td |   58 ++++++++++++++++++-------------------
>  1 file changed, 29 insertions(+), 29 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index a33ee69..3e3974e 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1438,67 +1438,67 @@ def : Pat <
>  def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
>  
>  def : Pat <
> -  (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
> -  (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1))
> +  (int_AMDGPU_div f32:$src0, f32:$src1),
> +  (V_MUL_LEGACY_F32_e32 f32:$src0, (V_RCP_LEGACY_F32_e32 f32:$src1))
>  >;
>  
>  def : Pat<
> -  (fdiv VSrc_32:$src0, VSrc_32:$src1),
> -  (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1))
> +  (fdiv f32:$src0, f32:$src1),
> +  (V_MUL_F32_e32 f32:$src0, (V_RCP_F32_e32 f32:$src1))
>  >;
>  
>  def : Pat <
> -  (fcos VSrc_32:$src0),
> -  (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
> +  (fcos f32:$src0),
> +  (V_COS_F32_e32 (V_MUL_F32_e32 f32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
>  >;
>  
>  def : Pat <
> -  (fsin VSrc_32:$src0),
> -  (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
> +  (fsin f32:$src0),
> +  (V_SIN_F32_e32 (V_MUL_F32_e32 f32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
>  >;
>  
>  def : Pat <
> -  (int_AMDGPU_cube VReg_128:$src),
> +  (int_AMDGPU_cube v4f32:$src),
>    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
> -    (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
> -                  (EXTRACT_SUBREG VReg_128:$src, sub1),
> -                  (EXTRACT_SUBREG VReg_128:$src, sub2),
> +    (V_CUBETC_F32 (EXTRACT_SUBREG v4f32:$src, sub0),
> +                  (EXTRACT_SUBREG v4f32:$src, sub1),
> +                  (EXTRACT_SUBREG v4f32:$src, sub2),
>                    0, 0, 0, 0), sub0),
> -    (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
> -                  (EXTRACT_SUBREG VReg_128:$src, sub1),
> -                  (EXTRACT_SUBREG VReg_128:$src, sub2),
> +    (V_CUBESC_F32 (EXTRACT_SUBREG v4f32:$src, sub0),
> +                  (EXTRACT_SUBREG v4f32:$src, sub1),
> +                  (EXTRACT_SUBREG v4f32:$src, sub2),
>                    0, 0, 0, 0), sub1),
> -    (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
> -                  (EXTRACT_SUBREG VReg_128:$src, sub1),
> -                  (EXTRACT_SUBREG VReg_128:$src, sub2),
> +    (V_CUBEMA_F32 (EXTRACT_SUBREG v4f32:$src, sub0),
> +                  (EXTRACT_SUBREG v4f32:$src, sub1),
> +                  (EXTRACT_SUBREG v4f32:$src, sub2),
>                    0, 0, 0, 0), sub2),
> -    (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
> -                  (EXTRACT_SUBREG VReg_128:$src, sub1),
> -                  (EXTRACT_SUBREG VReg_128:$src, sub2),
> +    (V_CUBEID_F32 (EXTRACT_SUBREG v4f32:$src, sub0),
> +                  (EXTRACT_SUBREG v4f32:$src, sub1),
> +                  (EXTRACT_SUBREG v4f32:$src, sub2),
>                    0, 0, 0, 0), sub3)
>  >;
>  
>  def : Pat <
> -  (i32 (sext (i1 SReg_64:$src0))),
> -  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
> +  (i32 (sext i1:$src0)),
> +  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), i1:$src0)
>  >;
>  
>  // 1. Offset as 8bit DWORD immediate
>  def : Pat <
> -  (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
> -  (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
> +  (int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset),
> +  (S_BUFFER_LOAD_DWORD_IMM v16i8:$sbase, IMM8bitDWORD:$offset)
>  >;
>  
>  // 2. Offset loaded in an 32bit SGPR
>  def : Pat <
> -  (int_SI_load_const SReg_128:$sbase, imm:$offset),
> -  (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
> +  (int_SI_load_const v16i8:$sbase, imm:$offset),
> +  (S_BUFFER_LOAD_DWORD_SGPR v16i8:$sbase, (S_MOV_B32 imm:$offset))
>  >;
>  
>  // 3. Offset in an 32Bit VGPR
>  def : Pat <
> -  (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
> -  (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
> +  (int_SI_load_const v16i8:$sbase, i32:$voff),
> +  (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, i32:$voff, v16i8:$sbase, 0, 0, 0)
>  >;
>  
>  /********** ================== **********/
> -- 
> 1.7.10.4
> 

> From e009091046c06e55dba8381ccb1bc3fac27c726b Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:37:24 +0100
> Subject: [PATCH 6/9] R600/SI: remove reg classes from interpolation patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
>  lib/Target/R600/SIInstructions.td |   14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 3e3974e..8db15c5 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1418,16 +1418,16 @@ def : Pat <
>  /********** ===================== **********/
>  
>  def : Pat <
> -  (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
> -  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
> +  (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
> +  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, i32:$params)
>  >;
>  
>  def : Pat <
> -  (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
> -  (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
> -                                    imm:$attr_chan, imm:$attr, M0Reg:$params),
> -                   (EXTRACT_SUBREG VReg_64:$ij, sub1),
> -                   imm:$attr_chan, imm:$attr, M0Reg:$params)
> +  (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
> +  (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
> +                                    imm:$attr_chan, imm:$attr, i32:$params),
> +                   (EXTRACT_SUBREG v2i32:$ij, sub1),
> +                   imm:$attr_chan, imm:$attr, i32:$params)
>  >;
>  
>  /********** ================== **********/
> -- 
> 1.7.10.4
> 

> From 7ee8131bbb98e6e13be8ae8e0df248550cb72ded Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 11:40:14 +0100
> Subject: [PATCH 7/9] R600/SI: remove register classes from modifier patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
>  lib/Target/R600/SIInstructions.td |   12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 8db15c5..0988653 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1364,20 +1364,20 @@ def : BitConvert <f32, i32, VReg_32>;
>  /********** =================== **********/
>  
>  def : Pat <
> -  (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
> -  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
> +  (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
> +  (V_ADD_F32_e64 f32:$src, (i32 0 /* SRC1 */),
>     0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
>  >;
>  
>  def : Pat <
> -  (fabs VReg_32:$src),
> -  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
> +  (fabs f32:$src),
> +  (V_ADD_F32_e64 f32:$src, (i32 0 /* SRC1 */),
>     1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
>  >;
>  
>  def : Pat <
> -  (fneg VReg_32:$src),
> -  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
> +  (fneg f32:$src),
> +  (V_ADD_F32_e64 f32:$src, (i32 0 /* SRC1 */),
>     0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
>  >;
>  
> -- 
> 1.7.10.4
> 

> From 11c3926424284acb50587b9038f7cb05afe53849 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 12:29:45 +0100
> Subject: [PATCH 8/9] R600/SI: remove register classes from image sampling
>  patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
>  lib/Target/R600/SIInstructions.td |   92 +++++++++++++++++--------------------
>  1 file changed, 41 insertions(+), 51 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 0988653..4f58081 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1203,77 +1203,67 @@ def : Pat <
>         VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
>  >;
>  
> +/********** ======================= **********/
> +/********** Image sampling patterns **********/
> +/********** ======================= **********/
>  
>  /* int_SI_sample for simple 1D texture lookup */
>  def : Pat <
> -  (int_SI_sample VReg_32:$addr, SReg_256:$rsrc, SReg_128:$sampler, imm),
> -  (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
> -                SReg_256:$rsrc, SReg_128:$sampler)
> +  (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
> +  (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, v1i32:$addr,
> +                v32i8:$rsrc, v16i8:$sampler)
>  >;
>  
> -class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
> -                    ValueType addr_type> : Pat <
> -    (name (addr_type addr_class:$addr),
> -          SReg_256:$rsrc, SReg_128:$sampler, imm),
> -    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
> -          SReg_256:$rsrc, SReg_128:$sampler)
> +class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
> +    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
> +    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, vt:$addr, v32i8:$rsrc, v16i8:$sampler)
>  >;
>  
> -class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
> -                        ValueType addr_type> : Pat <
> -    (name (addr_type addr_class:$addr),
> -          SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
> -    (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
> -          SReg_256:$rsrc, SReg_128:$sampler)
> +class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
> +    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT),
> +    (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, vt:$addr, v32i8:$rsrc, v16i8:$sampler)
>  >;
>  
> -class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
> -                         ValueType addr_type> : Pat <
> -    (name (addr_type addr_class:$addr),
> -          SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
> -    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
> -          SReg_256:$rsrc, SReg_128:$sampler)
> +class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
> +    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY),
> +    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, vt:$addr, v32i8:$rsrc, v16i8:$sampler)
>  >;
>  
>  class SampleShadowPattern<Intrinsic name, MIMG opcode,
> -                          RegisterClass addr_class, ValueType addr_type> : Pat <
> -    (name (addr_type addr_class:$addr),
> -          SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
> -    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
> -          SReg_256:$rsrc, SReg_128:$sampler)
> +                          ValueType vt> : Pat <
> +    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW),
> +    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, vt:$addr, v32i8:$rsrc, v16i8:$sampler)
>  >;
>  
>  class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
> -                               RegisterClass addr_class, ValueType addr_type> : Pat <
> -    (name (addr_type addr_class:$addr),
> -          SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
> -    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
> -          SReg_256:$rsrc, SReg_128:$sampler)
> +                               ValueType vt> : Pat <
> +    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY),
> +    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, vt:$addr, v32i8:$rsrc, v16i8:$sampler)
>  >;
>  
>  /* int_SI_sample* for texture lookups consuming more address parameters */
> -multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> {
> -  def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
> -  def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
> -  def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
> -  def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
> -  def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
> -
> -  def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
> -  def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
> -  def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
> -  def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
> -
> -  def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
> -  def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
> -  def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
> -  def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
> +multiclass SamplePatterns<ValueType addr_type> {
> +  def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
> +  def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
> +  def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
> +  def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
> +  def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
> +
> +  def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
> +  def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
> +  def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
> +  def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
> +
> +  def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
> +  def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
> +  def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
> +  def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
>  }
>  
> -defm : SamplePatterns<VReg_64, v2i32>;
> -defm : SamplePatterns<VReg_128, v4i32>;
> -defm : SamplePatterns<VReg_256, v8i32>;
> -defm : SamplePatterns<VReg_512, v16i32>;
> +defm : SamplePatterns<v2i32>;
> +defm : SamplePatterns<v4i32>;
> +defm : SamplePatterns<v8i32>;
> +defm : SamplePatterns<v16i32>;
>  
>  /********** ============================================ **********/
>  /********** Extraction, Insertion, Building and Casting  **********/
> -- 
> 1.7.10.4
> 

> From b39b969f73f1766e3fd4615364f0015589012a52 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Mar 2013 13:01:28 +0100
> Subject: [PATCH 9/9] R600/SI: remove register classes from the remaining
>  patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
>  lib/Target/R600/SIInstructions.td |   39 ++++++++++++++++++-------------------
>  1 file changed, 19 insertions(+), 20 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 4f58081..b81a3bd 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -793,8 +793,8 @@ def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
>  
>  //f32 pattern for V_CNDMASK_B32_e64
>  def : Pat <
> -  (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)),
> -  (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2)
> +  (f32 (select i1:$src2, f32:$src1, f32:$src0)),
> +  (V_CNDMASK_B32_e64 f32:$src0, f32:$src1, i1:$src2)
>  >;
>  
>  defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
> @@ -983,18 +983,18 @@ def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
>  } // isCommutable = 1
>  
>  def : Pat <
> -  (mul VSrc_32:$src0, VReg_32:$src1),
> -  (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
> +  (mul i32:$src0, i32:$src1),
> +  (V_MUL_LO_I32 i32:$src0, i32:$src1, (i32 0), 0, 0, 0, 0)
>  >;
>  
>  def : Pat <
> -  (mulhu VSrc_32:$src0, VReg_32:$src1),
> -  (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
> +  (mulhu i32:$src0, i32:$src1),
> +  (V_MUL_HI_U32 i32:$src0, i32:$src1, (i32 0), 0, 0, 0, 0)
>  >;
>  
>  def : Pat <
> -  (mulhs VSrc_32:$src0, VReg_32:$src1),
> -  (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
> +  (mulhs i32:$src0, i32:$src1),
> +  (V_MUL_HI_I32 i32:$src0, i32:$src1, (i32 0), 0, 0, 0, 0)
>  >;
>  
>  def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
> @@ -1031,15 +1031,15 @@ def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
>  >;
>  
>  def : Pat <
> -  (i1 (and SSrc_64:$src0, SSrc_64:$src1)),
> -  (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
> +  (i1 (and i1:$src0, i1:$src1)),
> +  (S_AND_B64 i1:$src0, i1:$src1)
>  >;
>  
>  def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
>  def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
>  def : Pat <
> -  (i1 (or SSrc_64:$src0, SSrc_64:$src1)),
> -  (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1)
> +  (i1 (or i1:$src0, i1:$src1)),
> +  (S_OR_B64 i1:$src0, i1:$src1)
>  >;
>  def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
>  def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
> @@ -1177,8 +1177,8 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
>  } // end IsCodeGenOnly, isPseudo
>  
>  def : Pat<
> -  (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
> -  (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0))
> +  (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
> +  (V_CNDMASK_B32_e64 f32:$src2, f32:$src1, (V_CMP_GT_F32_e64 0, f32:$src0))
>  >;
>  
>  def : Pat <
> @@ -1188,19 +1188,18 @@ def : Pat <
>  
>  /* int_SI_vs_load_input */
>  def : Pat<
> -  (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
> -                        VReg_32:$buf_idx_vgpr),
> +  (int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset,
> +                        i32:$buf_idx_vgpr),
>    (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
> -                           VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
> -                           0, 0, 0)
> +                           i32:$buf_idx_vgpr, v16i8:$tlst, 0, 0, 0)
>  >;
>  
>  /* int_SI_export */
>  def : Pat <
>    (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
> -                 VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
> +                 f32:$src0, f32:$src1, f32:$src2, f32:$src3),
>    (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
> -       VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
> +       f32:$src0, f32:$src1, f32:$src2, f32:$src3)
>  >;
>  
>  /********** ======================= **********/
> -- 
> 1.7.10.4
> 





More information about the llvm-commits mailing list