[PATCHES] R600/SI: VI fixes for bit shifts, GS

Tom Stellard tom at stellard.net
Wed Jan 28 11:58:49 PST 2015


On Tue, Jan 27, 2015 at 11:16:55PM +0100, Marek Olšák wrote:
> Hi,
> 
> This is another set of fixes for VI. Patches 1-2, 5-6 fix real issues.
> Patches 3-4, 7-8 are mostly cosmetic. Only patch 1 should fix an issue
> that is reproducible by piglit.
> 
> I couldn't test these, because my VI hw is very unstable. I'll try and
> test Bonaire tomorrow. That said, I'm pretty sure patches 1-7 are
> important improvements over the current state. I'm not sure about
> patch 8.
> 
> Please review.
> 
> Michel, would you be so kind as to test the first patch whether it
> fixes the GS hang? Sorry, I'm not able to tell the difference. Please
> apply patch 1 alone and please don't update your LLVM repo (just in
> case it uncovers some other bug).
> 
> Thank you very much,
> 
> Marek

> From 4740298959a4ebb361415019eaa15d899c80614e Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Mon, 26 Jan 2015 16:54:35 +0100
> Subject: [PATCH 1/8] R600/SI: Fix dependency between an instruction writing M0
>  and S_SENDMSG on VI
> 
> This fixes a hang when using an empty geometry shader.
> ---
>  lib/Target/R600/SIInsertWaits.cpp       | 33 +++++++++++++++++++++++++++++++++
>  test/CodeGen/R600/llvm.SI.sendmsg-m0.ll | 25 +++++++++++++++++++++++++
>  2 files changed, 58 insertions(+)
>  create mode 100644 test/CodeGen/R600/llvm.SI.sendmsg-m0.ll
> 
> diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
> index 181b116..6075001 100644
> --- a/lib/Target/R600/SIInsertWaits.cpp
> +++ b/lib/Target/R600/SIInsertWaits.cpp
> @@ -82,6 +82,8 @@ private:
>    /// \brief Type of the last opcode.
>    InstType LastOpcodeType;
>  
> +  bool LastInstWritesM0;
> +
>    /// \brief Get increment/decrement amount for this instruction.
>    Counters getHwCounts(MachineInstr &MI);
>  
> @@ -106,6 +108,9 @@ private:
>    /// \brief Resolve all operand dependencies to counter requirements
>    Counters handleOperands(MachineInstr &MI);
>  
> +  /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
> +  void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
> +
>  public:
>    SIInsertWaits(TargetMachine &tm) :
>      MachineFunctionPass(ID),
> @@ -403,6 +408,31 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
>    return Result;
>  }
>  
> +void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
> +                                  MachineBasicBlock::iterator I)
> +{

LLVM Coding style brace goes on same line as function.  Otherwise, LGTM.

> +  if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
> +    return;
> +
> +  // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
> +  if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
> +    BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
> +    LastInstWritesM0 = false;
> +    return;
> +  }
> +
> +  // Set whether this instruction sets M0
> +  LastInstWritesM0 = false;
> +
> +  unsigned NumOperands = I->getNumOperands();
> +  for (unsigned i = 0; i < NumOperands; i++) {
> +    const MachineOperand &Op = I->getOperand(i);
> +
> +    if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
> +      LastInstWritesM0 = true;
> +  }
> +}
> +
>  // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
>  // around other non-memory instructions.
>  bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
> @@ -417,6 +447,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
>    WaitedOn = ZeroCounts;
>    LastIssued = ZeroCounts;
>    LastOpcodeType = OTHER;
> +  LastInstWritesM0 = false;
>  
>    memset(&UsedRegs, 0, sizeof(UsedRegs));
>    memset(&DefinedRegs, 0, sizeof(DefinedRegs));
> @@ -433,6 +464,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
>          Changes |= insertWait(MBB, I, LastIssued);
>        else
>          Changes |= insertWait(MBB, I, handleOperands(*I));
> +
> +      handleSendMsg(MBB, I);
>        pushInstruction(MBB, I);
>      }
>  
> diff --git a/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll b/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll
> new file mode 100644
> index 0000000..4de8993
> --- /dev/null
> +++ b/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll
> @@ -0,0 +1,25 @@
> +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
> +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI %s
> +
> +; SI-LABEL: {{^}}main:
> +; SI: s_mov_b32 m0, s0
> +; SI-NEXT: s_sendmsg Gs_done(nop)
> +; SI-NEXT: s_endpgm
> +
> +; VI-LABEL: {{^}}main:
> +; VI: s_mov_b32 m0, s0
> +; VI-NEXT: s_nop 0
> +; VI-NEXT: s_sendmsg Gs_done(nop)
> +; VI-NEXT: s_endpgm
> +
> +define void @main(i32 inreg %a) #0 {
> +main_body:
> +  call void @llvm.SI.sendmsg(i32 3, i32 %a)
> +  ret void
> +}
> +
> +; Function Attrs: nounwind
> +declare void @llvm.SI.sendmsg(i32, i32) #1
> +
> +attributes #0 = { "ShaderType"="2" "unsafe-fp-math"="true" }
> +attributes #1 = { nounwind }
> -- 
> 2.1.0
> 

> From 259e212087782161d4b4ff069b768cd3f04ff0eb Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Mon, 26 Jan 2015 22:41:09 +0100
> Subject: [PATCH 2/8] R600/SI: Determine target-specific encoding of READLANE
>  and WRITELANE early
> 
> These are VOP2 on SI and VOP3 on VI, and their pseudos are neither, which can
> be a problem. In order to make isVOP2 and isVOP3 queries behave as expected,
> the encoding must be determined first.
> 
> This doesn't fix any known issue, but better safe than sorry.

Most games will hit this.  LGTM.

> ---
>  lib/Target/R600/SIRegisterInfo.cpp | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
> index 380c98b..2bc6416 100644
> --- a/lib/Target/R600/SIRegisterInfo.cpp
> +++ b/lib/Target/R600/SIRegisterInfo.cpp
> @@ -183,7 +183,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
>             Ctx.emitError("Ran out of VGPRs for spilling SGPR");
>          }
>  
> -        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
> +        BuildMI(*MBB, MI, DL,
> +                TII->get(TII->pseudoToMCOpcode(AMDGPU::V_WRITELANE_B32)),
> +                Spill.VGPR)
>                  .addReg(SubReg)
>                  .addImm(Spill.Lane);
>  
> @@ -217,7 +219,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
>            SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
>          }
>  
> -        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
> +        BuildMI(*MBB, MI, DL,
> +                TII->get(TII->pseudoToMCOpcode(AMDGPU::V_READLANE_B32)),
> +                SubReg)
>                  .addReg(Spill.VGPR)
>                  .addImm(Spill.Lane)
>                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
> -- 
> 2.1.0
> 

> From 9e1f9c0f62b6a1f54da6d2cdbbf09bb671902632 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Tue, 27 Jan 2015 13:04:32 +0100
> Subject: [PATCH 3/8] R600/SI: Trivial instruction definition corrections for
>  VI
> 
> - V_MAC_LEGACY_F32 exists on VI, but it's VOP3-only.
> 
> - Remove V_MUL_LO_U32, because it's identical to V_MUL_LO_I32.
>   Both instructions are even defined the same on VI.
> 

Is there any harm in keeping this?  It would be useful for the assembler.

> - Define CVT_PK opcodes which are different between SI and VI. These are
>   unused. The idea is to define all chip differences.
> ---
>  lib/Target/R600/SIInstrInfo.td    |  1 +
>  lib/Target/R600/SIInstructions.td | 39 ++++++++++++++++++++++++---------------
>  2 files changed, 25 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 852870e..a4e258e 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -786,6 +786,7 @@ def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
>  def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
>  def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
>  def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
> +def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
>  def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
>  def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
>    let Src0RC32 = VCSrc_32;
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 544ea3a..953c360 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1543,12 +1543,6 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m <
>  // These instructions only exist on SI and CI
>  let SubtargetPredicate = isSICI in {
>  
> -let isCommutable = 1 in {
> -defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
> -  VOP_F32_F32_F32
> ->;
> -} // End isCommutable = 1
> -
>  defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
>    VOP_F32_F32_F32, AMDGPUfmin_legacy
>  >;
> @@ -1569,6 +1563,12 @@ defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
>  } // End isCommutable = 1
>  } // End let SubtargetPredicate = SICI
>  
> +let isCommutable = 1 in {
> +defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
> +  VOP_F32_F32_F32
> +>;
> +} // End isCommutable = 1
> +
>  defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
>    AMDGPUbfm
>  >;
> @@ -1585,14 +1585,25 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
>    VOP_F32_F32_I32, AMDGPUldexp
>  >;
>  
> -////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
> -////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
> -////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
> +
> +defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
> +  VOP_I32_F32_I32>; // TODO: set "Uses = dst"
> +
> +defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32",
> +  VOP_I32_F32_F32
> +>;
> +defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32",
> +  VOP_I32_F32_F32
> +>;
>  defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
> - VOP_I32_F32_F32, int_SI_packf16
> +  VOP_I32_F32_F32, int_SI_packf16
> +>;
> +defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32",
> +  VOP_I32_I32_I32
> +>;
> +defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32",
> +  VOP_I32_I32_I32
>  >;
> -////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
> -////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
>  
>  //===----------------------------------------------------------------------===//
>  // VOP3 Instructions
> @@ -1732,9 +1743,7 @@ defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
>  
>  let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
>  
> -defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169, 0x285>, "v_mul_lo_u32",
> -  VOP_I32_I32_I32
> ->;
> +// V_MUL_LO_U32 is identical to V_MUL_LO_I32
>  defm V_MUL_HI_U32 : VOP3Inst <vop3<0x16a, 0x286>, "v_mul_hi_u32",
>    VOP_I32_I32_I32
>  >;
> -- 
> 2.1.0
> 

> From 8a0f3ecec050dc28d8cce14531ae8720c08b5e17 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Wed, 14 Jan 2015 20:47:28 +0100
> Subject: [PATCH 4/8] R600/SI: Remove VOP2_REV definitions from target-specific
>  instructions
> 
> The getCommute* functions are only used with pseudos, so this commit doesn't
> change anything.
> 

LGTM.

> The issue with missing non-rev versions of shift instructions on VI will fixed
> separately.
> ---
>  lib/Target/R600/SIInstrInfo.td    | 45 +++++++++++++++++----------------------
>  lib/Target/R600/SIInstructions.td |  9 +++-----
>  2 files changed, 22 insertions(+), 32 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index a4e258e..5699d49 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -850,25 +850,22 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>  }
>  
>  multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
> -                     string opName, string revOpSI> {
> +                     string opName, string revOp> {
>    def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
> -           VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>;
> +           VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
>  
>    def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
> -            VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>,
>              SIMCInstr <opName#"_e32", SISubtarget.SI>;
>  }
>  
>  multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
> -                   string opName, string revOpSI, string revOpVI> {
> +                   string opName, string revOp> {
>    def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
> -           VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>;
> +           VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
>  
>    def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
> -            VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>,
>              SIMCInstr <opName#"_e32", SISubtarget.SI>;
>    def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>,
> -            VOP2_REV<revOpVI#"_e32_vi", !eq(revOpVI, opName)>,
>              SIMCInstr <opName#"_e32", SISubtarget.VI>;
>  }
>  
> @@ -942,20 +939,18 @@ multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
>  }
>  
>  multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
> -                     list<dag> pattern, string opName, string revOpSI, string revOpVI,
> +                     list<dag> pattern, string opName, string revOp,
>                       bit HasMods = 1, bit UseFullOp = 0> {
>  
>    def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
> -           VOP2_REV<revOpSI#"_e64", !eq(revOpSI, opName)>;
> +           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
>  
>    def _si : VOP3_Real_si <op.SI3,
>                outs, ins, asm, opName>,
> -            VOP2_REV<revOpSI#"_e64_si", !eq(revOpSI, opName)>,
>              VOP3DisableFields<1, 0, HasMods>;
>  
>    def _vi : VOP3_Real_vi <op.VI3,
>                outs, ins, asm, opName>,
> -            VOP2_REV<revOpVI#"_e64_vi", !eq(revOpVI, opName)>,
>              VOP3DisableFields<1, 0, HasMods>;
>  }
>  
> @@ -971,14 +966,12 @@ multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
>    let sdst = SIOperand.VCC, Defs = [VCC] in {
>      def _si : VOP3b <op.SI3, outs, ins, asm, []>,
>                VOP3DisableFields<1, 0, HasMods>,
> -              SIMCInstr<opName#"_e64", SISubtarget.SI>,
> -              VOP2_REV<revOp#"_e64_si", !eq(revOp, opName)>;
> +              SIMCInstr<opName#"_e64", SISubtarget.SI>;
>  
>      // TODO: Do we need this VI variant here?
>      /*def _vi : VOP3b_vi <op.VI3, outs, ins, asm, []>,
>                VOP3DisableFields<1, 0, HasMods>,
> -              SIMCInstr<opName#"_e64", SISubtarget.VI>,
> -              VOP2_REV<revOp#"_e64_vi", !eq(revOp, opName)>;*/
> +              SIMCInstr<opName#"_e64", SISubtarget.VI>;*/
>    } // End sdst = SIOperand.VCC, Defs = [VCC]
>  }
>  
> @@ -1057,17 +1050,17 @@ multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
>  multiclass VOP2_Helper <vop2 op, string opName, dag outs,
>                          dag ins32, string asm32, list<dag> pat32,
>                          dag ins64, string asm64, list<dag> pat64,
> -                        string revOpSI, string revOpVI, bit HasMods> {
> -  defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOpSI, revOpVI>;
> +                        string revOp, bit HasMods> {
> +  defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
>  
>    defm _e64 : VOP3_2_m <op,
> -    outs, ins64, opName#"_e64"#asm64, pat64, opName, revOpSI, revOpVI, HasMods
> +    outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
>    >;
>  }
>  
>  multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
>                       SDPatternOperator node = null_frag,
> -                     string revOpSI = opName, string revOpVI = revOpSI> : VOP2_Helper <
> +                     string revOp = opName> : VOP2_Helper <
>    op, opName, P.Outs,
>    P.Ins32, P.Asm32, [],
>    P.Ins64, P.Asm64,
> @@ -1077,7 +1070,7 @@ multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
>                                        i1:$clamp, i32:$omod)),
>                   (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
>        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
> -  revOpSI, revOpVI, P.HasModifiers
> +  revOp, P.HasModifiers
>  >;
>  
>  multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
> @@ -1085,7 +1078,7 @@ multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
>                           dag ins64, string asm64, list<dag> pat64,
>                           string revOp, bit HasMods> {
>  
> -  defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp, revOp>;
> +  defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
>  
>    defm _e64 : VOP3b_2_m <op,
>      outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
> @@ -1111,16 +1104,16 @@ multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
>  multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
>                              dag ins32, string asm32, list<dag> pat32,
>                              dag ins64, string asm64, list<dag> pat64,
> -                            string revOpSI, string revOpVI, bit HasMods> {
> -  defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOpSI>;
> +                            string revOp, bit HasMods> {
> +  defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
>  
>    defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
> -                        revOpSI, revOpVI, HasMods>;
> +                        revOp, HasMods>;
>  }
>  
>  multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
>                            SDPatternOperator node = null_frag,
> -                          string revOpSI = opName, string revOpVI = revOpSI>
> +                          string revOp = opName>
>                            : VOP2_VI3_Helper <
>    op, opName, P.Outs,
>    P.Ins32, P.Asm32, [],
> @@ -1131,7 +1124,7 @@ multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
>                                        i1:$clamp, i32:$omod)),
>                   (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
>        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
> -  revOpSI, revOpVI, P.HasModifiers
> +  revOp, P.HasModifiers
>  >;
>  
>  class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 953c360..ca2abf8 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1457,22 +1457,19 @@ defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32,
>    AMDGPUumax
>  >;
>  
> -// No non-Rev Op on VI
>  defm V_LSHRREV_B32 : VOP2Inst <
>    vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
> -    "v_lshr_b32", "v_lshrrev_b32"
> +    "v_lshr_b32"
>  >;
>  
> -// No non-Rev OP on VI
>  defm V_ASHRREV_I32 : VOP2Inst <
>    vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
> -    "v_ashr_i32", "v_ashrrev_i32"
> +    "v_ashr_i32"
>  >;
>  
> -// No non-Rev OP on VI
>  defm V_LSHLREV_B32 : VOP2Inst <
>    vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
> -    "v_lshl_b32", "v_lshlrev_b32"
> +    "v_lshl_b32"
>  >;
>  
>  defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32",
> -- 
> 2.1.0
> 

> From 448f8a654ac800b80eba512545425bcb0e3f8cc9 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Tue, 27 Jan 2015 15:29:32 +0100
> Subject: [PATCH 5/8] R600/SI: Don't generate non-existent LSHL, LSHR, ASHR B32
>  variants on VI
> 
> This can happen when a REV instruction is commuted.
> 
> The trick is not to define the _vi versions of instructions, which has these
> consequences:
> - code generation will always fail if a pseudo cannot be lowered
>   (very useful to catch bugs where an unsupported instruction somehow makes
>    it to the printer)
> - ability to query if a pseudo can be lowered, which is done in commuteOpcode
>   to prevent REV from commuting to non-REV on VI

LGTM.

> ---
>  lib/Target/R600/SIInstrInfo.cpp   |  8 ++++++--
>  lib/Target/R600/SIInstrInfo.td    | 34 ++++++++++++++++++++++++++++++----
>  lib/Target/R600/SIInstructions.td | 10 +++++-----
>  test/CodeGen/R600/shl.ll          | 25 ++++++++++++++++++++++++-
>  test/CodeGen/R600/sra.ll          | 30 +++++++++++++++++++++++++++++-
>  5 files changed, 94 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 80b560e..53a1d8b 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -408,11 +408,15 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
>    int NewOpc;
>  
>    // Try to map original to commuted opcode
> -  if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
> +  NewOpc = AMDGPU::getCommuteRev(Opcode);
> +  // Check if the commuted (REV) opcode exists on the target.
> +  if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
>      return NewOpc;
>  
>    // Try to map commuted to original opcode
> -  if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
> +  NewOpc = AMDGPU::getCommuteOrig(Opcode);
> +  // Check if the original (non-REV) opcode exists on the target.
> +  if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
>      return NewOpc;
>  
>    return Opcode;
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 5699d49..fd0dfd3 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -945,13 +945,24 @@ multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
>    def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
>             VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
>  
> -  def _si : VOP3_Real_si <op.SI3,
> -              outs, ins, asm, opName>,
> +  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
>              VOP3DisableFields<1, 0, HasMods>;
>  
> -  def _vi : VOP3_Real_vi <op.VI3,
> -              outs, ins, asm, opName>,
> +  def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
> +            VOP3DisableFields<1, 0, HasMods>;
> +}
> +
> +multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
> +                     list<dag> pattern, string opName, string revOp,
> +                     bit HasMods = 1, bit UseFullOp = 0> {
> +
> +  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
> +           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
> +
> +  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
>              VOP3DisableFields<1, 0, HasMods>;
> +
> +  // No VI instruction. This class is for SI only.
>  }
>  
>  multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
> @@ -1073,6 +1084,21 @@ multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
>    revOp, P.HasModifiers
>  >;
>  
> +multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
> +                       SDPatternOperator node = null_frag,
> +                       string revOp = opName> {
> +  defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
> +
> +  defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64,
> +    !if(P.HasModifiers,
> +        [(set P.DstVT:$dst,
> +             (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
> +                                        i1:$clamp, i32:$omod)),
> +                   (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
> +        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
> +    opName, revOp, P.HasModifiers>;
> +}
> +
>  multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
>                           dag ins32, string asm32, list<dag> pat32,
>                           dag ins64, string asm64, list<dag> pat64,
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index ca2abf8..e62306d 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1540,21 +1540,21 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m <
>  // These instructions only exist on SI and CI
>  let SubtargetPredicate = isSICI in {
>  
> -defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
> +defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
>    VOP_F32_F32_F32, AMDGPUfmin_legacy
>  >;
> -defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
> +defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
>    VOP_F32_F32_F32, AMDGPUfmax_legacy
>  >;
>  
>  let isCommutable = 1 in {
> -defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
> -defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
> +defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
> +defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32",
>    VOP_I32_I32_I32, sra
>  >;
>  
>  let hasPostISelHook = 1 in {
> -defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
> +defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
>  }
>  
>  } // End isCommutable = 1
> diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
> index 75341a2..ff2f096 100644
> --- a/test/CodeGen/R600/shl.ll
> +++ b/test/CodeGen/R600/shl.ll
> @@ -1,6 +1,6 @@
>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
>  ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
> -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
> +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI-CHECK %s
>  
>  ;EG-CHECK: {{^}}shl_v2i32:
>  ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> @@ -10,6 +10,10 @@
>  ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
>  ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
>  
> +;VI-CHECK: {{^}}shl_v2i32:
> +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +
>  define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
>    %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
>    %a = load <2 x i32> addrspace(1) * %in
> @@ -31,6 +35,12 @@ define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
>  ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
>  ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
>  
> +;VI-CHECK: {{^}}shl_v4i32:
> +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +
>  define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
>    %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
>    %a = load <4 x i32> addrspace(1) * %in
> @@ -55,6 +65,9 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
>  ;SI-CHECK: {{^}}shl_i64:
>  ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
> +;VI-CHECK: {{^}}shl_i64:
> +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +
>  define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
>    %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
>    %a = load i64 addrspace(1) * %in
> @@ -90,6 +103,10 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
>  ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
> +;VI-CHECK: {{^}}shl_v2i64:
> +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +
>  define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
>    %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
>    %a = load <2 x i64> addrspace(1) * %in
> @@ -147,6 +164,12 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in
>  ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
> +;VI-CHECK: {{^}}shl_v4i64:
> +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +
>  define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
>    %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
>    %a = load <4 x i64> addrspace(1) * %in
> diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
> index f062e4c..44c1101 100644
> --- a/test/CodeGen/R600/sra.ll
> +++ b/test/CodeGen/R600/sra.ll
> @@ -1,6 +1,6 @@
>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
>  ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
> -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
> +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI-CHECK %s
>  
>  ;EG-CHECK-LABEL: {{^}}ashr_v2i32:
>  ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> @@ -10,6 +10,10 @@
>  ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
>  ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
>  
> +;VI-CHECK-LABEL: {{^}}ashr_v2i32:
> +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +
>  define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
>    %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
>    %a = load <2 x i32> addrspace(1) * %in
> @@ -31,6 +35,12 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
>  ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
>  ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
>  
> +;VI-CHECK-LABEL: {{^}}ashr_v4i32:
> +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
> +
>  define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
>    %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
>    %a = load <4 x i32> addrspace(1) * %in
> @@ -45,6 +55,10 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
>  
>  ;SI-CHECK-LABEL: {{^}}ashr_i64:
>  ;SI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
> +
> +;VI-CHECK-LABEL: {{^}}ashr_i64:
> +;VI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
> +
>  define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
>  entry:
>    %0 = sext i32 %in to i64
> @@ -69,6 +83,10 @@ entry:
>  
>  ;SI-CHECK-LABEL: {{^}}ashr_i64_2:
>  ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +
> +;VI-CHECK-LABEL: {{^}}ashr_i64_2:
> +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +
>  define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
>  entry:
>    %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
> @@ -109,6 +127,10 @@ entry:
>  ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
> +;VI-CHECK-LABEL: {{^}}ashr_v2i64:
> +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +
>  define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
>    %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
>    %a = load <2 x i64> addrspace(1) * %in
> @@ -174,6 +196,12 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
>  ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
> +;VI-CHECK-LABEL: {{^}}ashr_v4i64:
> +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +
>  define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
>    %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
>    %a = load <4 x i64> addrspace(1) * %in
> -- 
> 2.1.0
> 

> From cefe07504b0534fc864d2ec2189423a8208a1501 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Tue, 27 Jan 2015 20:34:20 +0100
> Subject: [PATCH 6/8] R600/SI: Fix B64 VALU shifts on VI
> 
> SI only has standard versions. VI only has REV versions.

LGTM.

> ---
>  lib/Target/R600/SIInstrInfo.cpp   | 18 ++++++++++++++++++
>  lib/Target/R600/SIInstrInfo.td    |  1 +
>  lib/Target/R600/SIInstructions.td | 14 ++++++++++++++
>  test/CodeGen/R600/rotl.i64.ll     | 28 +++++++++++++++-------------
>  test/CodeGen/R600/rotr.i64.ll     | 28 +++++++++++++++-------------
>  test/CodeGen/R600/shl.ll          | 14 +++++++-------
>  test/CodeGen/R600/sra.ll          | 14 +++++++-------
>  7 files changed, 77 insertions(+), 40 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 53a1d8b..bf8d589 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -2047,6 +2047,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
>          swapOperands(Inst);
>        }
>        break;
> +    case AMDGPU::S_LSHL_B64:
> +      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
> +        NewOpcode = AMDGPU::V_LSHLREV_B64;
> +        swapOperands(Inst);
> +      }
> +      break;
> +    case AMDGPU::S_ASHR_I64:
> +      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
> +        NewOpcode = AMDGPU::V_ASHRREV_I64;
> +        swapOperands(Inst);
> +      }
> +      break;
> +    case AMDGPU::S_LSHR_B64:
> +      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
> +        NewOpcode = AMDGPU::V_LSHRREV_B64;
> +        swapOperands(Inst);
> +      }
> +      break;
>  
>      case AMDGPU::S_BFE_U64:
>      case AMDGPU::S_BFM_B64:
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index fd0dfd3..2cd5adc 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -803,6 +803,7 @@ def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
>  }
>  
>  def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
> +def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
>  def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
>  
>  def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index e62306d..19710a3 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1803,6 +1803,20 @@ defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
>  
>  } // End SubtargetPredicate = isSICI
>  
> +let SubtargetPredicate = isVI in {
> +
> +defm V_LSHLREV_B64 : VOP3Inst <vop3<0, 0x28f>, "v_lshlrev_b64",
> +  VOP_I64_I32_I64
> +>;
> +defm V_LSHRREV_B64 : VOP3Inst <vop3<0, 0x290>, "v_lshrrev_b64",
> +  VOP_I64_I32_I64
> +>;
> +defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
> +  VOP_I64_I32_I64
> +>;
> +
> +} // End SubtargetPredicate = isVI
> +
>  //===----------------------------------------------------------------------===//
>  // Pseudo Instructions
>  //===----------------------------------------------------------------------===//
> diff --git a/test/CodeGen/R600/rotl.i64.ll b/test/CodeGen/R600/rotl.i64.ll
> index f094ece..6da17a4 100644
> --- a/test/CodeGen/R600/rotl.i64.ll
> +++ b/test/CodeGen/R600/rotl.i64.ll
> @@ -1,12 +1,12 @@
> -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s
> +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s
>  
> -; FUNC-LABEL: {{^}}s_rotl_i64:
> -; SI-DAG: s_lshl_b64
> -; SI-DAG: s_sub_i32
> -; SI-DAG: s_lshr_b64
> -; SI: s_or_b64
> -; SI: s_endpgm
> +; BOTH-LABEL: {{^}}s_rotl_i64:
> +; BOTH-DAG: s_lshl_b64
> +; BOTH-DAG: s_sub_i32
> +; BOTH-DAG: s_lshr_b64
> +; BOTH: s_or_b64
> +; BOTH: s_endpgm
>  define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
>  entry:
>    %0 = shl i64 %x, %y
> @@ -17,13 +17,15 @@ entry:
>    ret void
>  }
>  
> -; FUNC-LABEL: {{^}}v_rotl_i64:
> +; BOTH-LABEL: {{^}}v_rotl_i64:
>  ; SI-DAG: v_lshl_b64
> -; SI-DAG: v_sub_i32
> +; VI-DAG: v_lshlrev_b64
> +; BOTH-DAG: v_sub_i32
>  ; SI: v_lshr_b64
> -; SI: v_or_b32
> -; SI: v_or_b32
> -; SI: s_endpgm
> +; VI: v_lshrrev_b64
> +; BOTH: v_or_b32
> +; BOTH: v_or_b32
> +; BOTH: s_endpgm
>  define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
>  entry:
>    %x = load i64 addrspace(1)* %xptr, align 8
> diff --git a/test/CodeGen/R600/rotr.i64.ll b/test/CodeGen/R600/rotr.i64.ll
> index a637f71..f1d1d26 100644
> --- a/test/CodeGen/R600/rotr.i64.ll
> +++ b/test/CodeGen/R600/rotr.i64.ll
> @@ -1,11 +1,11 @@
> -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s
> +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s
>  
> -; FUNC-LABEL: {{^}}s_rotr_i64:
> -; SI-DAG: s_sub_i32
> -; SI-DAG: s_lshr_b64
> -; SI-DAG: s_lshl_b64
> -; SI: s_or_b64
> +; BOTH-LABEL: {{^}}s_rotr_i64:
> +; BOTH-DAG: s_sub_i32
> +; BOTH-DAG: s_lshr_b64
> +; BOTH-DAG: s_lshl_b64
> +; BOTH: s_or_b64
>  define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
>  entry:
>    %tmp0 = sub i64 64, %y
> @@ -16,12 +16,14 @@ entry:
>    ret void
>  }
>  
> -; FUNC-LABEL: {{^}}v_rotr_i64:
> -; SI-DAG: v_sub_i32
> +; BOTH-LABEL: {{^}}v_rotr_i64:
> +; BOTH-DAG: v_sub_i32
>  ; SI-DAG: v_lshr_b64
>  ; SI-DAG: v_lshl_b64
> -; SI: v_or_b32
> -; SI: v_or_b32
> +; VI-DAG: v_lshrrev_b64
> +; VI-DAG: v_lshlrev_b64
> +; BOTH: v_or_b32
> +; BOTH: v_or_b32
>  define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
>  entry:
>    %x = load i64 addrspace(1)* %xptr, align 8
> @@ -34,7 +36,7 @@ entry:
>    ret void
>  }
>  
> -; FUNC-LABEL: {{^}}s_rotr_v2i64:
> +; BOTH-LABEL: {{^}}s_rotr_v2i64:
>  define void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) {
>  entry:
>    %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
> @@ -45,7 +47,7 @@ entry:
>    ret void
>  }
>  
> -; FUNC-LABEL: {{^}}v_rotr_v2i64:
> +; BOTH-LABEL: {{^}}v_rotr_v2i64:
>  define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
>  entry:
>    %x = load <2 x i64> addrspace(1)* %xptr, align 8
> diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
> index ff2f096..c6a18bf 100644
> --- a/test/CodeGen/R600/shl.ll
> +++ b/test/CodeGen/R600/shl.ll
> @@ -66,7 +66,7 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
>  ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
>  ;VI-CHECK: {{^}}shl_i64:
> -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
>  
>  define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
>    %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
> @@ -104,8 +104,8 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
>  ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
>  ;VI-CHECK: {{^}}shl_v2i64:
> -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
> +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
>  
>  define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
>    %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
> @@ -165,10 +165,10 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in
>  ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
>  ;VI-CHECK: {{^}}shl_v4i64:
> -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> -;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
> +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
> +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
> +;VI-CHECK: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
>  
>  define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
>    %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
> diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
> index 44c1101..7b461ca 100644
> --- a/test/CodeGen/R600/sra.ll
> +++ b/test/CodeGen/R600/sra.ll
> @@ -85,7 +85,7 @@ entry:
>  ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
>  ;VI-CHECK-LABEL: {{^}}ashr_i64_2:
> -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
>  
>  define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
>  entry:
> @@ -128,8 +128,8 @@ entry:
>  ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
>  ;VI-CHECK-LABEL: {{^}}ashr_v2i64:
> -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
> +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
>  
>  define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
>    %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
> @@ -197,10 +197,10 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
>  ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
>  
>  ;VI-CHECK-LABEL: {{^}}ashr_v4i64:
> -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> -;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
> +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
> +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
> +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
> +;VI-CHECK: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
>  
>  define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
>    %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
> -- 
> 2.1.0
> 

> From 4457a80bbb0972a530a1294179347b6e99bfa21c Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Tue, 27 Jan 2015 16:28:47 +0100
> Subject: [PATCH 7/8] R600/SI: Rewrite VOP1InstSI to contain a pseudo and _si
>  opcode
> 
> What this does is that if you accidentally select these instructions on VI,
> the code generation will fail, because the pseudo -> _vi mapping will be
> undefined.
> 
> The idea is to be able to catch possible future bugs easily.

LGTM.

> ---
>  lib/Target/R600/SIInstrInfo.td | 30 +++++++++++++++++++++++-------
>  1 file changed, 23 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 2cd5adc..c16b84b 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -843,6 +843,15 @@ multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
>              SIMCInstr <opName#"_e32", SISubtarget.VI>;
>  }
>  
> +multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
> +                   string opName> {
> +  def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
> +
> +  def _si : VOP1<op.SI, outs, ins, asm, []>,
> +            SIMCInstr <opName#"_e32", SISubtarget.SI>;
> +  // No VI instruction. This class is for SI only.
> +}
> +
>  class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
>    VOP2Common <outs, ins, "", pattern>,
>    VOP <opName>,
> @@ -939,6 +948,16 @@ multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
>              VOP3DisableFields<0, 0, HasMods>;
>  }
>  
> +multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm,
> +                     list<dag> pattern, string opName, bit HasMods = 1> {
> +
> +  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
> +
> +  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
> +            VOP3DisableFields<0, 0, HasMods>;
> +  // No VI instruction. This class is for SI only.
> +}
> +
>  multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
>                       list<dag> pattern, string opName, string revOp,
>                       bit HasMods = 1, bit UseFullOp = 0> {
> @@ -1046,17 +1065,14 @@ multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
>  multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
>                         SDPatternOperator node = null_frag> {
>  
> -  def _e32 : VOP1 <op.SI, P.Outs, P.Ins32, opName#P.Asm32, []>,
> -             VOP <opName>;
> +  defm _e32 : VOP1SI_m <op, P.Outs, P.Ins32, opName#P.Asm32, [], opName>;
>  
> -  def _e64 : VOP3Common <P.Outs, P.Ins64, opName#P.Asm64,
> +  defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64,
>      !if(P.HasModifiers,
>        [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
>                                  i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
> -      [(set P.DstVT:$dst, (node P.Src0VT:$src0))])>,
> -            VOP <opName>,
> -            VOP3e <op.SI3>,
> -            VOP3DisableFields<0, 0, P.HasModifiers>;
> +      [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
> +    opName, P.HasModifiers>;
>  }
>  
>  multiclass VOP2_Helper <vop2 op, string opName, dag outs,
> -- 
> 2.1.0
> 

> From 86cdd84c7a4ba10d09d8186cf80a881521681c7e Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Tue, 27 Jan 2015 18:57:55 +0100
> Subject: [PATCH 8/8] R600/SI: Remove useless patterns in VALU which are
>  already covered by SALU
> 
> Also remove hasPostISelHook=1 from V_LSHL_B32. It's defined by InstSI already.

LGTM.

> ---
>  lib/Target/R600/SIInstructions.td | 61 ++++++++++-----------------------------
>  1 file changed, 16 insertions(+), 45 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 19710a3..0d11000 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1444,18 +1444,10 @@ defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
>    fminnum>;
>  defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32,
>    fmaxnum>;
> -defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32,
> -  AMDGPUsmin
> ->;
> -defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32,
> -  AMDGPUsmax
> ->;
> -defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32,
> -  AMDGPUumin
> ->;
> -defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32,
> -  AMDGPUumax
> ->;
> +defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>;
> +defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>;
> +defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>;
> +defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>;
>  
>  defm V_LSHRREV_B32 : VOP2Inst <
>    vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
> @@ -1472,14 +1464,9 @@ defm V_LSHLREV_B32 : VOP2Inst <
>      "v_lshl_b32"
>  >;
>  
> -defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32",
> -  VOP_I32_I32_I32, and>;
> -defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32",
> -  VOP_I32_I32_I32, or
> ->;
> -defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32",
> -  VOP_I32_I32_I32, xor
> ->;
> +defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
> +defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
> +defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
>  
>  defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
>  } // End isCommutable = 1
> @@ -1499,9 +1486,7 @@ let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
>  defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
>    VOP_I32_I32_I32, add
>  >;
> -defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32",
> -  VOP_I32_I32_I32, sub
> ->;
> +defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
>  
>  defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
>    VOP_I32_I32_I32, null_frag, "v_sub_i32"
> @@ -1509,10 +1494,10 @@ defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
>  
>  let Uses = [VCC] in { // Carry-in comes from VCC
>  defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
> -  VOP_I32_I32_I32_VCC, adde
> +  VOP_I32_I32_I32_VCC
>  >;
>  defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
> -  VOP_I32_I32_I32_VCC, sube
> +  VOP_I32_I32_I32_VCC
>  >;
>  defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
>    VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
> @@ -1548,15 +1533,9 @@ defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
>  >;
>  
>  let isCommutable = 1 in {
> -defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
> -defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32",
> -  VOP_I32_I32_I32, sra
> ->;
> -
> -let hasPostISelHook = 1 in {
> -defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
> -}
> -
> +defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>;
> +defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>;
> +defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
>  } // End isCommutable = 1
>  } // End let SubtargetPredicate = SICI
>  
> @@ -1786,17 +1765,9 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
>  // These instructions only exist on SI and CI
>  let SubtargetPredicate = isSICI in {
>  
> -defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
> -  VOP_I64_I64_I32, shl
> ->;
> -
> -defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
> -  VOP_I64_I64_I32, srl
> ->;
> -
> -defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
> -  VOP_I64_I64_I32, sra
> ->;
> +defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32>;
> +defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", VOP_I64_I64_I32>;
> +defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32>;
>  
>  defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
>    VOP_F32_F32_F32_F32>;
> -- 
> 2.1.0
> 

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits





More information about the llvm-commits mailing list