[llvm] r266205 - [AMDGPU][llvm-mc] Support of Trap Handler registers (TTMP0..11 and TBA/TMA)git status

Wed Apr 13 09:18:42 PDT 2016

Author: artem.tamazov
Date: Wed Apr 13 11:18:41 2016
New Revision: 266205

URL: http://llvm.org/viewvc/llvm-project?rev=266205&view=rev
Log:
[AMDGPU][llvm-mc] Support of Trap Handler registers (TTMP0..11 and TBA/TMA)git status

Tests added along with implemented feature.
Note that there is a small leftover of unecessary MI sheduling issue
(more info in the review). CodeGen/AMDGPU/salu-to-valu.ll updated to fix
the false regression.

TODO: Support for TTMP quads, comma-separated syntax in "[]" and more.

Differential Revision: http://reviews.llvm.org/D17825

Added:
    llvm/trunk/test/MC/AMDGPU/trap.s
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
    llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=266205&r1=266204&r2=266205&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Apr 13 11:18:41 2016
@@ -384,17 +384,32 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
           FlatUsed = true;
           continue;
 
+        case AMDGPU::TBA:
+        case AMDGPU::TBA_LO:
+        case AMDGPU::TBA_HI:
+        case AMDGPU::TMA:
+        case AMDGPU::TMA_LO:
+        case AMDGPU::TMA_HI:
+          llvm_unreachable("Trap Handler registers should not be used");
+          continue;
+
         default:
           break;
         }
 
         if (AMDGPU::SReg_32RegClass.contains(reg)) {
+          if (AMDGPU::TTMP_32RegClass.contains(reg)) {
+            llvm_unreachable("Trap Handler registers should not be used");
+          }
           isSGPR = true;
           width = 1;
         } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
           isSGPR = false;
           width = 1;
         } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
+          if (AMDGPU::TTMP_64RegClass.contains(reg)) {
+            llvm_unreachable("Trap Handler registers should not be used");
+          }
           isSGPR = true;
           width = 2;
         } else if (AMDGPU::VReg_64RegClass.contains(reg)) {

Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=266205&r1=266204&r2=266205&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed Apr 13 11:18:41 2016
@@ -574,8 +574,10 @@ struct OptionalOperand {
 
 }
 
-static int getRegClass(bool IsVgpr, unsigned RegWidth) {
-  if (IsVgpr) {
+enum  RegisterKind { IS_VGPR, IS_SGPR, IS_TTMP };
+
+static int getRegClass(RegisterKind Is, unsigned RegWidth) {
+  if (Is == IS_VGPR) {
     switch (RegWidth) {
       default: return -1;
       case 1: return AMDGPU::VGPR_32RegClassID;
@@ -585,16 +587,23 @@ static int getRegClass(bool IsVgpr, unsi
       case 8: return AMDGPU::VReg_256RegClassID;
       case 16: return AMDGPU::VReg_512RegClassID;
     }
+  } else if (Is == IS_TTMP) {
+    switch (RegWidth) {
+      default: return -1;
+      case 1: return AMDGPU::TTMP_32RegClassID;
+      case 2: return AMDGPU::TTMP_64RegClassID;
+    }
+  } else if (Is == IS_SGPR) {
+    switch (RegWidth) {
+      default: return -1;
+      case 1: return AMDGPU::SGPR_32RegClassID;
+      case 2: return AMDGPU::SGPR_64RegClassID;
+      case 4: return AMDGPU::SReg_128RegClassID;
+      case 8: return AMDGPU::SReg_256RegClassID;
+      case 16: return AMDGPU::SReg_512RegClassID;
+    }
   }
-
-  switch (RegWidth) {
-    default: return -1;
-    case 1: return AMDGPU::SGPR_32RegClassID;
-    case 2: return AMDGPU::SGPR_64RegClassID;
-    case 4: return AMDGPU::SReg_128RegClassID;
-    case 8: return AMDGPU::SReg_256RegClassID;
-    case 16: return AMDGPU::SReg_512RegClassID;
-  }
+  return -1;
 }
 
 static unsigned getRegForName(StringRef RegName) {
@@ -611,6 +620,10 @@ static unsigned getRegForName(StringRef
     .Case("vcc_hi", AMDGPU::VCC_HI)
     .Case("exec_lo", AMDGPU::EXEC_LO)
     .Case("exec_hi", AMDGPU::EXEC_HI)
+    .Case("tma_lo", AMDGPU::TMA_LO)
+    .Case("tma_hi", AMDGPU::TMA_HI)
+    .Case("tba_lo", AMDGPU::TBA_LO)
+    .Case("tba_hi", AMDGPU::TBA_HI)
     .Default(0);
 }
 
@@ -641,21 +654,21 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsm
                                     TRI, &getSTI(), false);
   }
 
-  // Match vgprs and sgprs
-  if (RegName[0] != 's' && RegName[0] != 'v')
+  // Match vgprs, sgprs and ttmps
+  if (RegName[0] != 's' && RegName[0] != 'v' && !RegName.startswith("ttmp"))
     return nullptr;
 
-  bool IsVgpr = RegName[0] == 'v';
+  const RegisterKind Is = RegName[0] == 'v' ? IS_VGPR : RegName[0] == 's' ? IS_SGPR : IS_TTMP;
   unsigned RegWidth;
   unsigned RegIndexInClass;
-  if (RegName.size() > 1) {
-    // We have a 32-bit register
+  if (RegName.size() > (Is == IS_TTMP ? strlen("ttmp") : 1) ) {
+    // We have a single 32-bit register. Syntax: vXX
     RegWidth = 1;
-    if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
+    if (RegName.substr(Is == IS_TTMP ? strlen("ttmp") : 1).getAsInteger(10, RegIndexInClass))
       return nullptr;
     Parser.Lex();
   } else {
-    // We have a register greater than 32-bits.
+    // We have a register greater than 32-bits (a range of single registers). Syntax: v[XX:YY]
 
     int64_t RegLo, RegHi;
     Parser.Lex();
@@ -678,11 +691,11 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsm
 
     Parser.Lex();
     RegWidth = (RegHi - RegLo) + 1;
-    if (IsVgpr) {
+    if (Is == IS_VGPR) {
       // VGPR registers aren't aligned.
       RegIndexInClass = RegLo;
     } else {
-      // SGPR registers are aligned.  Max alignment is 4 dwords.
+      // SGPR and TTMP registers must be are aligned. Max required alignment is 4 dwords.
       unsigned Size = std::min(RegWidth, 4u);
       if (RegLo % Size != 0)
         return nullptr;
@@ -691,7 +704,7 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsm
     }
   }
 
-  int RCID = getRegClass(IsVgpr, RegWidth);
+  int RCID = getRegClass(Is, RegWidth);
   if (RCID == -1)
     return nullptr;
 

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=266205&r1=266204&r2=266205&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Wed Apr 13 11:18:41 2016
@@ -18,6 +18,8 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include <string>
+
 using namespace llvm;
 
 void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
@@ -189,6 +191,18 @@ void AMDGPUInstPrinter::printRegOperand(
   case AMDGPU::VCC_HI:
     O << "vcc_hi";
     return;
+  case AMDGPU::TBA_LO:
+    O << "tba_lo";
+    return;
+  case AMDGPU::TBA_HI:
+    O << "tba_hi";
+    return;
+  case AMDGPU::TMA_LO:
+    O << "tma_lo";
+    return;
+  case AMDGPU::TMA_HI:
+    O << "tma_hi";
+    return;
   case AMDGPU::EXEC_LO:
     O << "exec_lo";
     return;
@@ -205,41 +219,44 @@ void AMDGPUInstPrinter::printRegOperand(
     break;
   }
 
-  char Type;
+  std::string Type;
   unsigned NumRegs;
 
   if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(reg)) {
-    Type = 'v';
+    Type = "v";
     NumRegs = 1;
   } else  if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(reg)) {
-    Type = 's';
+    Type = "s";
     NumRegs = 1;
   } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(reg)) {
-    Type = 'v';
+    Type = "v";
+    NumRegs = 2;
+  } else  if (MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(reg)) {
+    Type = "s";
     NumRegs = 2;
-  } else  if (MRI.getRegClass(AMDGPU::SReg_64RegClassID).contains(reg)) {
-    Type = 's';
+  } else  if (MRI.getRegClass(AMDGPU::TTMP_64RegClassID).contains(reg)) {
+    Type = "ttmp";
     NumRegs = 2;
   } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
-    Type = 'v';
+    Type = "v";
     NumRegs = 4;
   } else  if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) {
-    Type = 's';
+    Type = "s";
     NumRegs = 4;
   } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
-    Type = 'v';
+    Type = "v";
     NumRegs = 3;
   } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(reg)) {
-    Type = 'v';
+    Type = "v";
     NumRegs = 8;
   } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(reg)) {
-    Type = 's';
+    Type = "s";
     NumRegs = 8;
   } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(reg)) {
-    Type = 'v';
+    Type = "v";
     NumRegs = 16;
   } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(reg)) {
-    Type = 's';
+    Type = "s";
     NumRegs = 16;
   } else {
     O << getRegisterName(reg);
@@ -249,6 +266,8 @@ void AMDGPUInstPrinter::printRegOperand(
   // The low 8 bits of the encoding value is the register index, for both VGPRs
   // and SGPRs.
   unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1);
+  if (Type == "ttmp")
+    RegIdx -= 112; // Trap temps start at offset 112. TODO: Get this from tablegen.
   if (NumRegs == 1) {
     O << Type << RegIdx;
     return;

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=266205&r1=266204&r2=266205&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Wed Apr 13 11:18:41 2016
@@ -114,6 +114,16 @@ BitVector SIRegisterInfo::getReservedReg
   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
 
+  // Reserve Trap Handler registers - support is not implemented in Codegen.
+  reserveRegisterTuples(Reserved, AMDGPU::TBA);
+  reserveRegisterTuples(Reserved, AMDGPU::TMA);
+  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
+  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
+  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
+  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
+  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
+  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
+
   // Reserve the last 2 registers so we will always have at least 2 more that
   // will physically contain VCC.
   reserveRegisterTuples(Reserved, AMDGPU::SGPR102_SGPR103);
@@ -640,7 +650,21 @@ unsigned SIRegisterInfo::getPhysRegSubRe
       switch(Channel) {
         case 0: return AMDGPU::VCC_LO;
         case 1: return AMDGPU::VCC_HI;
-        default: llvm_unreachable("Invalid SubIdx for VCC");
+        default: llvm_unreachable("Invalid SubIdx for VCC"); break;
+      }
+
+    case AMDGPU::TBA:
+      switch(Channel) {
+        case 0: return AMDGPU::TBA_LO;
+        case 1: return AMDGPU::TBA_HI;
+        default: llvm_unreachable("Invalid SubIdx for TBA"); break;
+      }
+
+    case AMDGPU::TMA:
+      switch(Channel) {
+        case 0: return AMDGPU::TMA_LO;
+        case 1: return AMDGPU::TMA_HI;
+        default: llvm_unreachable("Invalid SubIdx for TMA"); break;
       }
 
   case AMDGPU::FLAT_SCR:

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td?rev=266205&r1=266204&r2=266205&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td Wed Apr 13 11:18:41 2016
@@ -44,6 +44,40 @@ def EXEC : RegisterWithSubRegs<"EXEC", [
 def SCC : SIReg<"scc", 253>;
 def M0 : SIReg <"m0", 124>;
 
+// Trap handler registers
+def TBA_LO : SIReg<"tba_lo", 108>;
+def TBA_HI : SIReg<"tba_hi", 109>;
+
+def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
+          DwarfRegAlias<TBA_LO> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1];
+  let HWEncoding = 108;
+}
+
+def TMA_LO : SIReg<"tma_lo", 110>;
+def TMA_HI : SIReg<"tma_hi", 111>;
+
+def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
+          DwarfRegAlias<TMA_LO> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1];
+  let HWEncoding = 110;
+}
+
+def TTMP0 : SIReg <"ttmp0", 112>;
+def TTMP1 : SIReg <"ttmp1", 113>;
+def TTMP2 : SIReg <"ttmp2", 114>;
+def TTMP3 : SIReg <"ttmp3", 115>;
+def TTMP4 : SIReg <"ttmp4", 116>;
+def TTMP5 : SIReg <"ttmp5", 117>;
+def TTMP6 : SIReg <"ttmp6", 118>;
+def TTMP7 : SIReg <"ttmp7", 119>;
+def TTMP8 : SIReg <"ttmp8", 120>;
+def TTMP9 : SIReg <"ttmp9", 121>;
+def TTMP10 : SIReg <"ttmp10", 122>;
+def TTMP11 : SIReg <"ttmp11", 123>;
+
 multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
   def _ci : SIReg<n, ci_e>;
   def _vi : SIReg<n, vi_e>;
@@ -135,6 +169,24 @@ def SGPR_512 : RegisterTuples<[sub0, sub
                                (add (decimate (shl SGPR_32, 14), 4)),
                                (add (decimate (shl SGPR_32, 15), 4))]>;
 
+// Trap handler TMP 32-bit registers
+def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+                            (add (sequence "TTMP%u", 0, 11))> {
+  let isAllocatable = 0;
+}
+
+// Trap handler TMP 64-bit registers
+def TTMP_64Regs : RegisterTuples<[sub0, sub1],
+                             [(add (decimate TTMP_32, 2)),
+                              (add (decimate (shl TTMP_32, 1), 2))]>;
+
+// Trap handler TMP 128-bit registers
+def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
+                              [(add (decimate TTMP_32, 4)),
+                               (add (decimate (shl TTMP_32, 1), 4)),
+                               (add (decimate (shl TTMP_32, 2), 4)),
+                               (add (decimate (shl TTMP_32, 3), 4))]>;
+
 // VGPR 32-bit registers
 def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
                             (add (sequence "VGPR%u", 0, 255))>;
@@ -199,13 +251,18 @@ class RegImmMatcher<string name> : AsmOp
 
 // Register class for all scalar registers (SGPRs + Special Registers)
 def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
-  (add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
+  (add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
+   TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
 >;
 
 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;
 
+def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> {
+  let isAllocatable = 0;
+}
+
 def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
-  (add SGPR_64, VCC, EXEC, FLAT_SCR)
+  (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64)
 >;
 
 def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {

Modified: llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll?rev=266205&r1=266204&r2=266205&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll Wed Apr 13 11:18:41 2016
@@ -201,22 +201,14 @@ entry:
 
 ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:
 
-; GCN-NOHSA-NOT: v_add
-; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
-; GCN-NOHSA-NOT: v_add
-; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
-; GCN-NOHSA-NOT: v_add
-; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
-; GCN-NOHSA-NOT: v_add
-; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
-; GCN-NOHSA-NOT: v_add
-; GCN-NOHSA: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}}
-; GCN-NOHSA-NOT: v_add
-; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}}
-; GCN-NOHSA-NOT: v_add
-; GCN-NOHSA: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}}
-; GCN-NOHSA-NOT: v_add
-; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}}
+; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
+; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
+; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
+; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
+; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}}
+; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}}
+; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}}
+; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}}
 
 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}

Added: llvm/trunk/test/MC/AMDGPU/trap.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/trap.s?rev=266205&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/trap.s (added)
+++ llvm/trunk/test/MC/AMDGPU/trap.s Wed Apr 13 11:18:41 2016
@@ -0,0 +1,99 @@
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=VI
+
+//===----------------------------------------------------------------------===//
+// Trap Handler related - 32 bit registers
+//===----------------------------------------------------------------------===//
+
+s_add_u32     ttmp0, ttmp0, 4
+// SICI: s_add_u32 ttmp0, ttmp0, 4       ; encoding: [0x70,0x84,0x70,0x80]
+// VI:   s_add_u32 ttmp0, ttmp0, 4       ; encoding: [0x70,0x84,0x70,0x80]
+
+s_add_u32     ttmp4, 8, ttmp4
+// SICI: s_add_u32 ttmp4, 8, ttmp4       ; encoding: [0x88,0x74,0x74,0x80]
+// VI:   s_add_u32 ttmp4, 8, ttmp4       ; encoding: [0x88,0x74,0x74,0x80]
+
+s_add_u32     ttmp4, ttmp4, 0x00000100
+// SICI: s_add_u32 ttmp4, ttmp4, 0x100   ; encoding: [0x74,0xff,0x74,0x80,0x00,0x01,0x00,0x00]
+// VI:   s_add_u32 ttmp4, ttmp4, 0x100   ; encoding: [0x74,0xff,0x74,0x80,0x00,0x01,0x00,0x00]
+
+s_add_u32     ttmp4, ttmp4, 4
+// SICI: s_add_u32 ttmp4, ttmp4, 4       ; encoding: [0x74,0x84,0x74,0x80]
+// VI:   s_add_u32 ttmp4, ttmp4, 4       ; encoding: [0x74,0x84,0x74,0x80]
+
+s_add_u32     ttmp4, ttmp8, ttmp4
+// SICI: s_add_u32 ttmp4, ttmp8, ttmp4   ; encoding: [0x78,0x74,0x74,0x80]
+// VI:   s_add_u32 ttmp4, ttmp8, ttmp4   ; encoding: [0x78,0x74,0x74,0x80]
+
+s_and_b32     ttmp10, ttmp8, 0x00000080
+// SICI: s_and_b32 ttmp10, ttmp8, 0x80   ; encoding: [0x78,0xff,0x7a,0x87,0x80,0x00,0x00,0x00]
+// VI:   s_and_b32 ttmp10, ttmp8, 0x80   ; encoding: [0x78,0xff,0x7a,0x86,0x80,0x00,0x00,0x00]
+
+s_and_b32     ttmp9, tma_hi, 0x0000ffff
+// SICI: s_and_b32 ttmp9, tma_hi, 0xffff ; encoding: [0x6f,0xff,0x79,0x87,0xff,0xff,0x00,0x00]
+// VI:   s_and_b32 ttmp9, tma_hi, 0xffff ; encoding: [0x6f,0xff,0x79,0x86,0xff,0xff,0x00,0x00]
+
+s_and_b32     ttmp9, ttmp9, 0x000001ff
+// SICI: s_and_b32 ttmp9, ttmp9, 0x1ff   ; encoding: [0x79,0xff,0x79,0x87,0xff,0x01,0x00,0x00]
+// VI:   s_and_b32 ttmp9, ttmp9, 0x1ff   ; encoding: [0x79,0xff,0x79,0x86,0xff,0x01,0x00,0x00]
+
+s_and_b32     ttmp9, tma_lo, 0xffff0000
+// SICI: s_and_b32 ttmp9, tma_lo, 0xffff0000 ; encoding: [0x6e,0xff,0x79,0x87,0x00,0x00,0xff,0xff]
+// VI:   s_and_b32 ttmp9, tma_lo, 0xffff0000 ; encoding: [0x6e,0xff,0x79,0x86,0x00,0x00,0xff,0xff]
+
+s_and_b32     ttmp9, ttmp9, ttmp8
+// SICI: s_and_b32 ttmp9, ttmp9, ttmp8   ; encoding: [0x79,0x78,0x79,0x87]
+// VI:   s_and_b32 ttmp9, ttmp9, ttmp8   ; encoding: [0x79,0x78,0x79,0x86]
+
+s_and_b32   ttmp8, ttmp1, 0x01000000
+// SICI: s_and_b32 ttmp8, ttmp1, 0x1000000 ; encoding: [0x71,0xff,0x78,0x87,0x00,0x00,0x00,0x01]
+// VI:   s_and_b32 ttmp8, ttmp1, 0x1000000 ; encoding: [0x71,0xff,0x78,0x86,0x00,0x00,0x00,0x01]
+
+s_cmp_eq_i32  ttmp8, 0
+// SICI: s_cmp_eq_i32 ttmp8, 0           ; encoding: [0x78,0x80,0x00,0xbf]
+// VI:   s_cmp_eq_i32 ttmp8, 0           ; encoding: [0x78,0x80,0x00,0xbf]
+
+s_cmp_eq_i32  ttmp8, 0x000000fe
+// SICI: s_cmp_eq_i32 ttmp8, 0xfe        ; encoding: [0x78,0xff,0x00,0xbf,0xfe,0x00,0x00,0x00]
+// VI:   s_cmp_eq_i32 ttmp8, 0xfe        ; encoding: [0x78,0xff,0x00,0xbf,0xfe,0x00,0x00,0x00]
+
+s_lshr_b32    ttmp8, ttmp8, 12
+// SICI: s_lshr_b32 ttmp8, ttmp8, 12     ; encoding: [0x78,0x8c,0x78,0x90]
+// VI:   s_lshr_b32 ttmp8, ttmp8, 12     ; encoding: [0x78,0x8c,0x78,0x8f]
+
+s_mov_b32     m0, ttmp8
+// SICI: s_mov_b32 m0, ttmp8             ; encoding: [0x78,0x03,0xfc,0xbe]
+// VI:   s_mov_b32 m0, ttmp8             ; encoding: [0x78,0x00,0xfc,0xbe]
+
+s_mov_b32     ttmp10, 0
+// SICI: s_mov_b32 ttmp10, 0             ; encoding: [0x80,0x03,0xfa,0xbe]
+// VI:   s_mov_b32 ttmp10, 0             ; encoding: [0x80,0x00,0xfa,0xbe]
+
+s_mov_b32     ttmp11, 0x01024fac
+// SICI: s_mov_b32 ttmp11, 0x1024fac     ; encoding: [0xff,0x03,0xfb,0xbe,0xac,0x4f,0x02,0x01]
+// VI:   s_mov_b32 ttmp11, 0x1024fac     ; encoding: [0xff,0x00,0xfb,0xbe,0xac,0x4f,0x02,0x01]
+
+s_mov_b32     ttmp8, m0
+// SICI: s_mov_b32 ttmp8, m0             ; encoding: [0x7c,0x03,0xf8,0xbe]
+// VI:   s_mov_b32 ttmp8, m0             ; encoding: [0x7c,0x00,0xf8,0xbe]
+
+s_mov_b32     ttmp8, tma_lo
+// SICI: s_mov_b32 ttmp8, tma_lo         ; encoding: [0x6e,0x03,0xf8,0xbe]
+// VI:   s_mov_b32 ttmp8, tma_lo         ; encoding: [0x6e,0x00,0xf8,0xbe]
+
+s_mul_i32     ttmp8, 0x00000324, ttmp8
+// SICI: s_mul_i32 ttmp8, 0x324, ttmp8   ; encoding: [0xff,0x78,0x78,0x93,0x24,0x03,0x00,0x00]
+// VI:   s_mul_i32 ttmp8, 0x324, ttmp8   ; encoding: [0xff,0x78,0x78,0x92,0x24,0x03,0x00,0x00]
+
+s_or_b32      ttmp9, ttmp9, 0x00280000
+// SICI: s_or_b32 ttmp9, ttmp9, 0x280000 ; encoding: [0x79,0xff,0x79,0x88,0x00,0x00,0x28,0x00]
+// VI:   s_or_b32 ttmp9, ttmp9, 0x280000 ; encoding: [0x79,0xff,0x79,0x87,0x00,0x00,0x28,0x00]
+
+//===----------------------------------------------------------------------===//
+// Trap Handler related - Pairs and quadruples of registers
+//===----------------------------------------------------------------------===//
+
+s_mov_b64     ttmp[4:5], exec
+// SICI: s_mov_b64 ttmp[4:5], exec       ; encoding: [0x7e,0x04,0xf4,0xbe]
+// VI:   s_mov_b64 ttmp[4:5], exec       ; encoding: [0x7e,0x01,0xf4,0xbe]