[PATCH] R600/SI: Remove the M0Reg register class

Wed Jun 11 13:56:31 PDT 2014

This class only contined one register, M0, and was used to ensure
some operands always read from the M0 register.  With the
MachineScheduler enabled we sometimes run into a situation where we
have to spill a M0Reg, which is not ideal, so instead of using this
special class, we use a custom inserter to force operands to M0.
---
 lib/Target/R600/SIFixSGPRCopies.cpp |  1 -
 lib/Target/R600/SIISelLowering.cpp  | 17 +++++++++++++++++
 lib/Target/R600/SIInstructions.td   | 34 +++++++++++++++++++++++-----------
 lib/Target/R600/SIRegisterInfo.td   |  5 ++---
 4 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
index 5f71453..782f651 100644
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -185,7 +185,6 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
   const TargetRegisterClass *SrcRC;
 
   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
-      DstRC == &AMDGPU::M0RegRegClass ||
       MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
     return false;
 
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 608aad2..01ce64e 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -545,6 +545,23 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
             .addImm(1) // CLAMP
             .addImm(0); // OMOD
     MI->eraseFromParent();
+    break;
+  }
+  case AMDGPU::SI_MOV_M0: {
+    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+            .addOperand(MI->getOperand(1));
+
+    unsigned DstReg = MI->getOperand(0).getReg();
+    for (MachineOperand &MO : MRI.use_operands(DstReg)) {
+      // Clear the kill flag from each use.  Since we are potentailly inserting
+      // M0 uses into more than one instruction, then we need to remove the
+      // kill flag otherwise all uses after the first will be considered dead,
+      // and the machine verifier will complain.
+      MO.setIsKill(false);
+    }
+    MRI.replaceRegWith(MI->getOperand(0).getReg(), AMDGPU::M0);
+    MI->eraseFromParent();
+    break;
   }
   }
   return BB;
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 475f28e..8080d5b 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -434,11 +434,12 @@ def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
 //def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
 
 let Uses = [EXEC] in {
-  def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
-      [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
+
+def S_SENDMSG : SOPP <
+  0x00000010, (ins SendMsgImm:$simm16, SReg_32:$m0), "S_SENDMSG $simm16", []
   > {
-    let DisableEncoding = "$m0";
-  }
+  let DisableEncoding = "$m0";
+}
 } // End Uses = [EXEC]
 
 //def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
@@ -1064,7 +1065,7 @@ defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
 def V_INTERP_P1_F32 : VINTRP <
   0x00000000,
   (outs VReg_32:$dst),
-  (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+  (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, SReg_32:$m0),
   "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]",
   []> {
   let DisableEncoding = "$m0";
@@ -1073,7 +1074,7 @@ def V_INTERP_P1_F32 : VINTRP <
 def V_INTERP_P2_F32 : VINTRP <
   0x00000001,
   (outs VReg_32:$dst),
-  (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+  (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$m0),
   "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
   []> {
 
@@ -1085,7 +1086,7 @@ def V_INTERP_P2_F32 : VINTRP <
 def V_INTERP_MOV_F32 : VINTRP <
   0x00000002,
   (outs VReg_32:$dst),
-  (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+  (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, SReg_32:$m0),
   "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]",
   []> {
   let DisableEncoding = "$m0";
@@ -1520,6 +1521,9 @@ def V_SUB_F64 : InstSI <
   []
 >;
 
+// Store a value in the M0 register
+def SI_MOV_M0 : InstSI <(outs SReg_32:$dst), (ins SReg_32:$src0), "", []>;
+
 } // end usesCustomInserter
 
 multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
@@ -1635,6 +1639,13 @@ def : Pat <
 >;
 
 //===----------------------------------------------------------------------===//
+// SOPP Patterns
+//===----------------------------------------------------------------------===//
+def : Pat <
+  (int_SI_sendmsg imm:$simm16, i32:$m0),
+  (S_SENDMSG imm:$simm16, (SI_MOV_M0 $m0))
+>;
+//===----------------------------------------------------------------------===//
 // VOP2 Patterns
 //===----------------------------------------------------------------------===//
 
@@ -1987,15 +1998,16 @@ def : Pat <
 
 def : Pat <
   (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
-  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params)
+  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (SI_MOV_M0 $params))
 >;
 
 def : Pat <
-  (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
+  (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
   (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
-                                    imm:$attr_chan, imm:$attr, i32:$params),
+                                    imm:$attr_chan, imm:$attr,
+				    (SI_MOV_M0 $params)),
                    (EXTRACT_SUBREG $ij, sub1),
-                   imm:$attr_chan, imm:$attr, $params)
+                   imm:$attr_chan, imm:$attr, (SI_MOV_M0 $params))
 >;
 
 /********** ================== **********/
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index f1f01de..d6fb960 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -151,15 +151,14 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
 //  Register classes used as source and destination
 //===----------------------------------------------------------------------===//
 
-// Special register classes for predicates and the M0 register
+// Special register classes for predicates.
 def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
 def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
 def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
-def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
 
 // Register class for all scalar registers (SGPRs + Special Registers)
 def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
-  (add SGPR_32, M0Reg, VCC_LO)
+  (add SGPR_32, M0, VCC_LO)
 >;
 
 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
-- 
1.8.1.5