[PATCHES] R600/SI: VI fixes
Tom Stellard
tom at stellard.net
Mon Jan 12 13:32:40 PST 2015
On Mon, Dec 15, 2014 at 01:53:19PM +0100, Marek Olšák wrote:
> An updated version is attached. I fixed the shrinking pass for opcodes
> that don't have e32 encoding (patch 3). I'm not sure if patch 2 is a
> good idea, I'd appreciate some opinion on that.
>
> The MIN3/MAX3 patch is not included here. Still don't know what aspect
> of those opcodes should be tested. Also, I can't re-use the current
> tests, because the store opcode that the tests use isn't implemented
> for VI.
>
> Marek
>
> On Sun, Dec 14, 2014 at 11:02 PM, Marek Olšák <maraeo at gmail.com> wrote:
> > The current pseudos define everything except for the encoding.
> >
> > The 5 attached patches replace patch 1 in the previous series.
> > This new series unifies other VOP2 opcodes which are only available as
> > VOP3 on VI.
> >
> > Marek
> >
> > On Sat, Dec 13, 2014 at 7:20 PM, Matt Arsenault <arsenm2 at gmail.com> wrote:
> >>
> >>> On Dec 13, 2014, at 5:12 AM, Marek Olšák <maraeo at gmail.com> wrote:
> >>>
> >>> On Fri, Dec 12, 2014 at 11:57 PM, Matt Arsenault <arsenm2 at gmail.com> wrote:
> >>>>
> >>>>> On Dec 12, 2014, at 4:41 PM, Marek Olšák <maraeo at gmail.com> wrote:
> >>>>>
> >>>>> Please review.
> >>>>>
> >>>>> Marek
> >>>>
> >>>> #1 - Why can’t this use the same encoding mapping trick the other instructions do so everything except encoding doesn’t need to see separate opcodes?
> >>>
> >>> The instructions are VOP2 on SI and VOP3 on VI. I can't define common
> >>> pseudo instructions, because it's not clear whether I should set the
> >>> VOP2 bit or the VOP3 bit or both or none.
> >>>
> >>> Marek
> >>
> >> I would expect pseudos to have neither set, but I’m not sure what the current pseudos do
> From fe119184222cc6e48fc6f1726d140dd13014e009 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Sun, 14 Dec 2014 15:11:23 +0100
> Subject: [PATCH 1/6] R600/SI: Add common class VOPAnyCommon
>
LGTM.
> From 57e9b82fd65ae1b3b537932b3ac961163ecd2bd9 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Sun, 14 Dec 2014 16:08:20 +0100
> Subject: [PATCH 2/6] R600/SI: Remove multiclass VOP1InstSI
>
> VOP1Inst also defines a VI opcode, but the predicate should prevent it from
> getting selected (right?), so the behavior should be the same.
Yes.
LGTM.
> From e9a3b4bf87f6e9173dae0dc01beccd0278d59393 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Mon, 15 Dec 2014 13:09:36 +0100
> Subject: [PATCH 3/6] R600/SI: Don't shrink instructions whose e32 encoding
> doesn't exist
>
> ---
> lib/Target/R600/AMDGPUMCInstLower.cpp | 16 +++++++---------
> lib/Target/R600/AMDGPUMCInstLower.h | 7 +++----
> lib/Target/R600/SIShrinkInstructions.cpp | 6 ++++++
> 3 files changed, 16 insertions(+), 13 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
> index 2dcebf5..554c534 100644
> --- a/lib/Target/R600/AMDGPUMCInstLower.cpp
> +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
> @@ -40,7 +40,7 @@ AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
> { }
>
> enum AMDGPUMCInstLower::SISubtarget
> -AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned Gen) const {
> +AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned Gen) {
Why was 'const' removed ?
> switch (Gen) {
> default:
> return AMDGPUMCInstLower::SI;
> @@ -49,19 +49,17 @@ AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned Gen) const {
> }
> }
>
> -unsigned AMDGPUMCInstLower::getMCOpcode(unsigned MIOpcode) const {
> +int AMDGPUMCInstLower::getMCOpcode(unsigned MIOpcode, unsigned Gen) {
Same question here.
>
> - int MCOpcode = AMDGPU::getMCOpcode(MIOpcode,
> - AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
> - if (MCOpcode == -1)
> - MCOpcode = MIOpcode;
> -
> - return MCOpcode;
> + return AMDGPU::getMCOpcode(MIOpcode, AMDGPUSubtargetToSISubtarget(Gen));
> }
>
> void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
>
> - OutMI.setOpcode(getMCOpcode(MI->getOpcode()));
> + int MIOpcode = MI->getOpcode();
> + int MCOpcode = getMCOpcode(MIOpcode, ST.getGeneration());
> +
> + OutMI.setOpcode(MCOpcode != -1 ? MCOpcode : MIOpcode);
>
> for (const MachineOperand &MO : MI->explicit_operands()) {
> MCOperand MCOp;
> diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h
> index 0ae4d11..56dc3e3 100644
> --- a/lib/Target/R600/AMDGPUMCInstLower.h
> +++ b/lib/Target/R600/AMDGPUMCInstLower.h
> @@ -31,10 +31,7 @@ class AMDGPUMCInstLower {
>
> /// Convert a member of the AMDGPUSubtarget::Generation enum to the
> /// SISubtarget enum.
> - enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) const;
> -
> - /// Get the MC opcode for this MachineInstr.
> - unsigned getMCOpcode(unsigned MIOpcode) const;
> + static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen);
>
> public:
> AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST);
> @@ -42,6 +39,8 @@ public:
> /// \brief Lower a MachineInstr to an MCInst
> void lower(const MachineInstr *MI, MCInst &OutMI) const;
>
> + /// Get the MC opcode for this MachineInstr.
> + static int getMCOpcode(unsigned MIOpcode, unsigned Gen);
> };
>
> } // End namespace llvm
> diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp
> index e97f934..20632d2 100644
> --- a/lib/Target/R600/SIShrinkInstructions.cpp
> +++ b/lib/Target/R600/SIShrinkInstructions.cpp
> @@ -10,6 +10,7 @@
> //
>
> #include "AMDGPU.h"
> +#include "AMDGPUMCInstLower.h"
> #include "AMDGPUSubtarget.h"
> #include "SIInstrInfo.h"
> #include "llvm/ADT/Statistic.h"
> @@ -220,6 +221,11 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
> if (Op32 == -1)
> continue;
>
> + // Can't shrink an instruction if the e32 version doesn't exist
> + // for this GPU family.
> + if (AMDGPUMCInstLower::getMCOpcode(Op32, TRI.ST.getGeneration()) == 65535)
> + continue;
> +
We should wrap AMDGPU::getVOPe32() with a new function in SIInstrInfo that includes this
check. Would it also be possible to move the getMCOpcode function to SIInstrInfo?
> if (TII->isVOPC(Op32)) {
> unsigned DstReg = MI.getOperand(0).getReg();
> if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
> --
> 2.1.0
>
> From d1d525333b691f456bc08a98dac2be33e15771f9 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Fri, 12 Dec 2014 11:07:30 +0100
> Subject: [PATCH 4/6] R600/SI: Add V_READLANE_B32 and V_WRITELANE_B32 for VI
>
> These are VOP3-only on VI.
>
> The new multiclass doesn't define VOP3 versions of VOP2 instructions.
LGTM.
> From aa6f2d3be38c60ab9b7709be9e208ecd284a61f8 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Sun, 14 Dec 2014 22:26:00 +0100
> Subject: [PATCH 5/6] R600/SI: Use 64-bit encoding by default for opcodes that
> are VOP3-only on VI
>
We should be doing this for all instructions anyway. LGTM.
> From 8ed81aab5e2320baa2c99b16850fc06572b21b63 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Sun, 14 Dec 2014 19:27:17 +0100
> Subject: [PATCH 6/6] R600/SI: Unify VOP2 instructions which are VOP3-only on
> VI
>
> This removes some duplicated classes and definitions.
>
> These instructions are defined:
> _e32 // pseudo
> _e32_si
> _e64 // pseudo
> _e64_si
> _e64_vi
>
> I hope the shrinking pass doesn't operate on pseudo instructions.
It is supposed to. I need more time to review this patch.
-Tom
> ---
> lib/Target/R600/SIInstrInfo.td | 81 +++++++++++++++++++++------------------
> lib/Target/R600/SIInstructions.td | 23 ++++++-----
> lib/Target/R600/VIInstructions.td | 29 +-------------
> 3 files changed, 56 insertions(+), 77 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index e93b8b4..ff9062f 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -36,6 +36,12 @@ class vop2 <bits<6> si, bits<6> vi = si> : vop {
> field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
> }
>
> +// Specify a VOP2 opcode for SI and VOP3 opcode for VI
> +// that doesn't have VOP2 encoding on VI
> +class vop23 <bits<6> si, bits<10> vi> : vop2 <si> {
> + let VI3 = vi;
> +}
> +
> class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {
> let SI3 = si;
> let VI3 = vi;
> @@ -818,6 +824,16 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
> let isPseudo = 1;
> }
>
> +multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
> + string opName, string revOpSI> {
> + def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
> + VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>;
> +
> + def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
> + VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>,
> + SIMCInstr <opName#"_e32", SISubtarget.SI>;
> +}
> +
> multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
> string opName, string revOpSI, string revOpVI> {
> def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
> @@ -859,16 +875,6 @@ class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
> VOP3e_vi <op>,
> SIMCInstr <opName#"_e64", SISubtarget.VI>;
>
> -// VI only instruction
> -class VOP3_vi <bits<10> op, string opName, dag outs, dag ins, string asm,
> - list<dag> pattern, int NumSrcArgs, bit HasMods = 1> :
> - VOP3Common <outs, ins, asm, pattern>,
> - VOP <opName>,
> - VOP3e_vi <op>,
> - VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
> - !if(!eq(NumSrcArgs, 2), 0, 1),
> - HasMods>;
> -
> multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
> string opName, int NumSrcArgs, bit HasMods = 1> {
>
> @@ -1060,6 +1066,33 @@ multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
> revOp, P.HasModifiers
> >;
>
> +// A VOP2 instruction that is VOP3-only on VI.
> +multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
> + dag ins32, string asm32, list<dag> pat32,
> + dag ins64, string asm64, list<dag> pat64,
> + string revOpSI, string revOpVI, bit HasMods> {
> + defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOpSI>;
> +
> + defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
> + revOpSI, revOpVI, HasMods>;
> +}
> +
> +multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
> + SDPatternOperator node = null_frag,
> + string revOpSI = opName, string revOpVI = revOpSI>
> + : VOP2_VI3_Helper <
> + op, opName, P.Outs,
> + P.Ins32, P.Asm32, [],
> + P.Ins64, P.Asm64,
> + !if(P.HasModifiers,
> + [(set P.DstVT:$dst,
> + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> + i1:$clamp, i32:$omod)),
> + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
> + [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
> + revOpSI, revOpVI, P.HasModifiers
> +>;
> +
> class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
> VOPCCommon <ins, "", pattern>,
> VOP <opName>,
> @@ -1170,34 +1203,6 @@ multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
> P.NumSrcArgs, P.HasModifiers
> >;
>
> -class VOP3InstVI <bits<10> op, string opName, VOPProfile P,
> - SDPatternOperator node = null_frag> : VOP3_vi <
> - op, opName#"_vi", P.Outs, P.Ins64, opName#P.Asm64,
> - !if(!eq(P.NumSrcArgs, 3),
> - !if(P.HasModifiers,
> - [(set P.DstVT:$dst,
> - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> - i1:$clamp, i32:$omod)),
> - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
> - (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
> - [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
> - P.Src2VT:$src2))]),
> - !if(!eq(P.NumSrcArgs, 2),
> - !if(P.HasModifiers,
> - [(set P.DstVT:$dst,
> - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> - i1:$clamp, i32:$omod)),
> - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
> - [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
> - /* P.NumSrcArgs == 1 */,
> - !if(P.HasModifiers,
> - [(set P.DstVT:$dst,
> - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> - i1:$clamp, i32:$omod))))],
> - [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
> - P.NumSrcArgs, P.HasModifiers
> ->;
> -
> multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterClass arc,
> string opName, list<dag> pattern> :
> VOP3b_2_m <
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 825038e..1f8422c 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1534,30 +1534,33 @@ defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
> }
>
> } // End isCommutable = 1
> +} // End let SubtargetPredicate = SICI
>
> -defm V_BFM_B32 : VOP2Inst <vop2<0x1e>, "v_bfm_b32", VOP_I32_I32_I32,
> - AMDGPUbfm>;
> -defm V_BCNT_U32_B32 : VOP2Inst <vop2<0x22>, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
> -defm V_MBCNT_LO_U32_B32 : VOP2Inst <vop2<0x23>, "v_mbcnt_lo_u32_b32",
> +defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
> + AMDGPUbfm
> +>;
> +defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
> VOP_I32_I32_I32
> >;
> -defm V_MBCNT_HI_U32_B32 : VOP2Inst <vop2<0x24>, "v_mbcnt_hi_u32_b32",
> +defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
> VOP_I32_I32_I32
> >;
> -defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32",
> +defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
> + VOP_I32_I32_I32
> +>;
> +defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
> VOP_F32_F32_I32, AMDGPUldexp
> >;
>
> ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
> ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
> ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
> -defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32",
> +defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
> VOP_I32_F32_F32, int_SI_packf16
> >;
> ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
> ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
>
> -} // End let SubtargetPredicate = SICI
> //===----------------------------------------------------------------------===//
> // VOP3 Instructions
> //===----------------------------------------------------------------------===//
> @@ -2667,16 +2670,12 @@ def : Pat <
> (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
> >;
>
> -let Predicates = [isSICI] in {
> -
> def : Pat <
> (int_SI_tid),
> (V_MBCNT_HI_U32_B32_e64 0xffffffff,
> (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))
> >;
>
> -}
> -
> //===----------------------------------------------------------------------===//
> // VOP3 Patterns
> //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/VIInstructions.td b/lib/Target/R600/VIInstructions.td
> index 733a66b..083b922 100644
> --- a/lib/Target/R600/VIInstructions.td
> +++ b/lib/Target/R600/VIInstructions.td
> @@ -11,22 +11,6 @@
>
> let SubtargetPredicate = isVI in {
>
> -def V_LDEXP_F32 : VOP3InstVI <0x288, "v_ldexp_f32", VOP_F32_F32_I32,
> - AMDGPUldexp
> ->;
> -def V_BFM_B32 : VOP3InstVI <0x293, "v_bfm_b32", VOP_I32_I32_I32, AMDGPUbfm>;
> -def V_BCNT_U32_B32 : VOP3InstVI <0x28b, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
> -def V_MBCNT_LO_U32_B32 : VOP3InstVI <0x28c, "v_mbcnt_lo_u32_b32",
> - VOP_I32_I32_I32
> ->;
> -def V_MBCNT_HI_U32_B32 : VOP3InstVI <0x28d, "v_mbcnt_hi_u32_b32",
> - VOP_I32_I32_I32
> ->;
> -
> -def V_CVT_PKRTZ_F16_F32 : VOP3InstVI <0x296, "v_cvt_pkrtz_f16_f32",
> - VOP_I32_F32_F32, int_SI_packf16
> ->;
> -
> defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi <
> 0x14, "buffer_load_dword", VReg_32, i32, global_load
> >;
> @@ -37,22 +21,13 @@ defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <
>
> } // End SubtargetPredicate = isVI
>
> -//===----------------------------------------------------------------------===//
> -// VOP2 Patterns
> -//===----------------------------------------------------------------------===//
> -
> -let Predicates = [isVI] in {
> -
> -def : Pat <
> - (int_SI_tid),
> - (V_MBCNT_HI_U32_B32 0xffffffff,
> - (V_MBCNT_LO_U32_B32 0xffffffff, 0))
> ->;
>
> //===----------------------------------------------------------------------===//
> // SMEM Patterns
> //===----------------------------------------------------------------------===//
>
> +let Predicates = [isVI] in {
> +
> // 1. Offset as 8bit DWORD immediate
> def : Pat <
> (SIload_constant v4i32:$sbase, IMM20bit:$offset),
> --
> 2.1.0
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list