[llvm] r284980 - AMDGPU: Fix Two Address problems with v_movreld

Mon Oct 24 07:56:02 PDT 2016

Author: nha
Date: Mon Oct 24 09:56:02 2016
New Revision: 284980

URL: http://llvm.org/viewvc/llvm-project?rev=284980&view=rev
Log:
AMDGPU: Fix Two Address problems with v_movreld

Summary:
The v_movreld machine instruction is used with three operands that are
in a sense tied to each other (the explicit VGPR_32 def and the implicit
VGPR_NN def and use). There is no way to express that using the currently
available operand bits, and indeed there are cases where the Two Address
instructions pass does the wrong thing.

This patch introduces a new set of pseudo instructions that are identical
in intended semantics as v_movreld, but they only have two tied operands.

Having to add a new set of pseudo instructions is admittedly annoying, but
it's a fairly straightforward and solid approach. The only alternative I
see is to try to teach the Two Address instructions pass about Three Address
instructions, and I'm afraid that's trickier and is going to end up more
fragile.

Note that v_movrels does not suffer from this problem, and so this patch
does not touch it.

This fixes several GL45-CTS.shaders.indexing.* tests.

Reviewers: tstellarAMD, arsenm

Subscribers: kzhuravl, wdng, yaxunl, llvm-commits, tony-tye

Differential Revision: https://reviews.llvm.org/D25633

Added:
    llvm/trunk/test/CodeGen/AMDGPU/movreld-bug.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=284980&r1=284979&r2=284980&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Oct 24 09:56:02 2016
@@ -1451,6 +1451,23 @@ static MachineBasicBlock *emitIndirectSr
   return LoopBB;
 }
 
+static unsigned getMOVRELDPseudo(const TargetRegisterClass *VecRC) {
+  switch (VecRC->getSize()) {
+  case 4:
+    return AMDGPU::V_MOVRELD_B32_V1;
+  case 8:
+    return AMDGPU::V_MOVRELD_B32_V2;
+  case 16:
+    return AMDGPU::V_MOVRELD_B32_V4;
+  case 32:
+    return AMDGPU::V_MOVRELD_B32_V8;
+  case 64:
+    return AMDGPU::V_MOVRELD_B32_V16;
+  default:
+    llvm_unreachable("unsupported size for MOVRELD pseudos");
+  }
+}
+
 static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
                                           MachineBasicBlock &MBB,
                                           const SISubtarget &ST) {
@@ -1504,20 +1521,13 @@ static MachineBasicBlock *emitIndirectDs
 
       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
     } else {
-      const MCInstrDesc &MovRelDesc = TII->get(AMDGPU::V_MOVRELD_B32_e32);
+      const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
 
-      MachineInstr *MovRel =
-        BuildMI(MBB, I, DL, MovRelDesc)
-        .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst
-        .addOperand(*Val)
-        .addReg(Dst, RegState::ImplicitDefine)
-        .addReg(SrcVec->getReg(), RegState::Implicit);
-
-      const int ImpDefIdx = MovRelDesc.getNumOperands() +
-        MovRelDesc.getNumImplicitUses();
-      const int ImpUseIdx = ImpDefIdx + 1;
-
-      MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
+      BuildMI(MBB, I, DL, MovRelDesc)
+          .addReg(Dst, RegState::Define)
+          .addReg(SrcVec->getReg())
+          .addOperand(*Val)
+          .addImm(SubReg - AMDGPU::sub0);
     }
 
     MI.eraseFromParent();
@@ -1555,20 +1565,13 @@ static MachineBasicBlock *emitIndirectDs
       .addReg(PhiReg, RegState::Implicit)
       .addReg(AMDGPU::M0, RegState::Implicit);
   } else {
-    const MCInstrDesc &MovRelDesc = TII->get(AMDGPU::V_MOVRELD_B32_e32);
-    // vdst is not actually read and just provides the base register index.
-    MachineInstr *MovRel =
-      BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
-    .addReg(PhiReg, RegState::Undef, SubReg) // vdst
-    .addOperand(*Val)
-    .addReg(Dst, RegState::ImplicitDefine)
-    .addReg(PhiReg, RegState::Implicit);
-
-    const int ImpDefIdx = MovRelDesc.getNumOperands() +
-      MovRelDesc.getNumImplicitUses();
-    const int ImpUseIdx = ImpDefIdx + 1;
+    const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
 
-    MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
+    BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
+        .addReg(Dst, RegState::Define)
+        .addReg(PhiReg)
+        .addOperand(*Val)
+        .addImm(SubReg - AMDGPU::sub0);
   }
 
   MI.eraseFromParent();

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=284980&r1=284979&r2=284980&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Mon Oct 24 09:56:02 2016
@@ -909,6 +909,32 @@ bool SIInstrInfo::expandPostRAPseudo(Mac
     MI.eraseFromParent();
     break;
   }
+  case AMDGPU::V_MOVRELD_B32_V1:
+  case AMDGPU::V_MOVRELD_B32_V2:
+  case AMDGPU::V_MOVRELD_B32_V4:
+  case AMDGPU::V_MOVRELD_B32_V8:
+  case AMDGPU::V_MOVRELD_B32_V16: {
+    const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
+    unsigned VecReg = MI.getOperand(0).getReg();
+    bool IsUndef = MI.getOperand(1).isUndef();
+    unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
+    assert(VecReg == MI.getOperand(1).getReg());
+
+    MachineInstr *MovRel =
+        BuildMI(MBB, MI, DL, MovRelDesc)
+            .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
+            .addOperand(MI.getOperand(2))
+            .addReg(VecReg, RegState::ImplicitDefine)
+            .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
+
+    const int ImpDefIdx =
+        MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
+    const int ImpUseIdx = ImpDefIdx + 1;
+    MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
+
+    MI.eraseFromParent();
+    break;
+  }
   case AMDGPU::SI_PC_ADD_REL_OFFSET: {
     MachineFunction &MF = *MBB.getParent();
     unsigned Reg = MI.getOperand(0).getReg();

Modified: llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td?rev=284980&r1=284979&r2=284980&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td Mon Oct 24 09:56:02 2016
@@ -538,6 +538,26 @@ def V_MOV_B32_indirect : VPseudoInstSI<(
   let VOP1 = 1;
 }
 
+// This is a pseudo variant of the v_movreld_b32 instruction in which the
+// vector operand appears only twice, once as def and once as use. Using this
+// pseudo avoids problems with the Two Address instructions pass.
+class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI <
+  (outs rc:$vdst),
+  (ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> {
+  let VOP1 = 1;
+
+  let Constraints = "$vsrc = $vdst";
+  let Uses = [M0, EXEC];
+
+  let SubtargetPredicate = HasMovrel;
+}
+
+def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>;
+def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>;
+def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>;
+def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>;
+def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
+
 let Predicates = [isVI] in {
 
 def : Pat <

Added: llvm/trunk/test/CodeGen/AMDGPU/movreld-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/movreld-bug.ll?rev=284980&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/movreld-bug.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/movreld-bug.ll Mon Oct 24 09:56:02 2016
@@ -0,0 +1,15 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}main:
+; GCN: v_movreld_b32_e32 v0,
+; GCN: v_mov_b32_e32 v0, v1
+; GCN: ; return
+define amdgpu_ps float @main(i32 inreg %arg) #0 {
+main_body:
+  %tmp24 = insertelement <2 x float> undef, float 0.000000e+00, i32 %arg
+  %tmp25 = extractelement <2 x float> %tmp24, i32 1
+  ret float %tmp25
+}
+
+attributes #0 = { "InitialPSInputAddr"="36983" }