[llvm] r356735 - [AMDGPU] Added v5i32 and v5f32 register classes

Tim Renouf via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 22 03:11:21 PDT 2019


Author: tpr
Date: Fri Mar 22 03:11:21 2019
New Revision: 356735

URL: http://llvm.org/viewvc/llvm-project?rev=356735&view=rev
Log:
[AMDGPU] Added v5i32 and v5f32 register classes

They are not used by anything yet, but a subsequent commit will start
using them for image ops that return 5 dwords.

Differential Revision: https://reviews.llvm.org/D58903

Change-Id: I63e1904081e39a6d66e4eb96d51df25ad399d271

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
    llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll
    llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td Fri Mar 22 03:11:21 2019
@@ -110,11 +110,12 @@ def CC_AMDGPU_Func : CallingConv<[
     VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
     VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
     VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
-  CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
+  CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v5i32, v5f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
   CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
   CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
   CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>,
   CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
+  CCIfType<[v5i32, v5f32], CCAssignToStack<20, 4>>,
   CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
   CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
 ]>;

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Fri Mar 22 03:11:21 2019
@@ -544,6 +544,8 @@ static unsigned selectSGPRVectorRegClass
     return AMDGPU::SGPR_96RegClassID;
   case 4:
     return AMDGPU::SReg_128RegClassID;
+  case 5:
+    return AMDGPU::SGPR_160RegClassID;
   case 8:
     return AMDGPU::SReg_256RegClassID;
   case 16:

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Mar 22 03:11:21 2019
@@ -156,6 +156,9 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
 
+  setOperationAction(ISD::LOAD, MVT::v5f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32);
+
   setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
 
@@ -244,6 +247,9 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
 
+  setOperationAction(ISD::STORE, MVT::v5f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32);
+
   setOperationAction(ISD::STORE, MVT::v8f32, Promote);
   AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
 
@@ -335,6 +341,8 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom);
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom);
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
@@ -343,6 +351,8 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
 
@@ -402,7 +412,7 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
 
   static const MVT::SimpleValueType VectorIntTypes[] = {
-    MVT::v2i32, MVT::v3i32, MVT::v4i32
+    MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32
   };
 
   for (MVT VT : VectorIntTypes) {
@@ -444,7 +454,7 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
   }
 
   static const MVT::SimpleValueType FloatVectorTypes[] = {
-     MVT::v2f32, MVT::v3f32, MVT::v4f32
+     MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32
   };
 
   for (MVT VT : FloatVectorTypes) {
@@ -492,6 +502,9 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
   setOperationAction(ISD::SELECT, MVT::v4f32, Promote);
   AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32);
 
+  setOperationAction(ISD::SELECT, MVT::v5f32, Promote);
+  AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32);
+
   // There are no libcalls of any kind.
   for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
     setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Fri Mar 22 03:11:21 2019
@@ -357,6 +357,9 @@ void AMDGPUInstPrinter::printRegOperand(
   } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo)) {
     O << 'v';
     NumRegs = 3;
+  } else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo)) {
+    O << 'v';
+    NumRegs = 5;
   } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) {
     O << 'v';
     NumRegs = 8;

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Mar 22 03:11:21 2019
@@ -132,6 +132,9 @@ SITargetLowering::SITargetLowering(const
   addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
   addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
 
+  addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
+  addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
+
   addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
   addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
 
@@ -155,6 +158,7 @@ SITargetLowering::SITargetLowering(const
   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v3i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v5i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
   setOperationAction(ISD::LOAD, MVT::i1, Custom);
@@ -163,6 +167,7 @@ SITargetLowering::SITargetLowering(const
   setOperationAction(ISD::STORE, MVT::v2i32, Custom);
   setOperationAction(ISD::STORE, MVT::v3i32, Custom);
   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v5i32, Custom);
   setOperationAction(ISD::STORE, MVT::v8i32, Custom);
   setOperationAction(ISD::STORE, MVT::v16i32, Custom);
   setOperationAction(ISD::STORE, MVT::i1, Custom);
@@ -336,6 +341,12 @@ SITargetLowering::SITargetLowering(const
   setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i32, Expand);
   setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4f32, Expand);
 
+  // Deal with vec5 vector operations when widened to vec8.
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5i32, Expand);
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5f32, Expand);
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i32, Expand);
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8f32, Expand);
+
   // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
   // and output demarshalling
   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
@@ -9688,6 +9699,9 @@ SITargetLowering::getRegForInlineAsmCons
       case 128:
         RC = &AMDGPU::SReg_128RegClass;
         break;
+      case 160:
+        RC = &AMDGPU::SReg_160RegClass;
+        break;
       case 256:
         RC = &AMDGPU::SReg_256RegClass;
         break;
@@ -9713,6 +9727,9 @@ SITargetLowering::getRegForInlineAsmCons
       case 128:
         RC = &AMDGPU::VReg_128RegClass;
         break;
+      case 160:
+        RC = &AMDGPU::VReg_160RegClass;
+        break;
       case 256:
         RC = &AMDGPU::VReg_256RegClass;
         break;

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Fri Mar 22 03:11:21 2019
@@ -845,6 +845,8 @@ static unsigned getSGPRSpillSaveOpcode(u
     return AMDGPU::SI_SPILL_S96_SAVE;
   case 16:
     return AMDGPU::SI_SPILL_S128_SAVE;
+  case 20:
+    return AMDGPU::SI_SPILL_S160_SAVE;
   case 32:
     return AMDGPU::SI_SPILL_S256_SAVE;
   case 64:
@@ -864,6 +866,8 @@ static unsigned getVGPRSpillSaveOpcode(u
     return AMDGPU::SI_SPILL_V96_SAVE;
   case 16:
     return AMDGPU::SI_SPILL_V128_SAVE;
+  case 20:
+    return AMDGPU::SI_SPILL_V160_SAVE;
   case 32:
     return AMDGPU::SI_SPILL_V256_SAVE;
   case 64:
@@ -949,6 +953,8 @@ static unsigned getSGPRSpillRestoreOpcod
     return AMDGPU::SI_SPILL_S96_RESTORE;
   case 16:
     return AMDGPU::SI_SPILL_S128_RESTORE;
+  case 20:
+    return AMDGPU::SI_SPILL_S160_RESTORE;
   case 32:
     return AMDGPU::SI_SPILL_S256_RESTORE;
   case 64:
@@ -968,6 +974,8 @@ static unsigned getVGPRSpillRestoreOpcod
     return AMDGPU::SI_SPILL_V96_RESTORE;
   case 16:
     return AMDGPU::SI_SPILL_V128_RESTORE;
+  case 20:
+    return AMDGPU::SI_SPILL_V160_RESTORE;
   case 32:
     return AMDGPU::SI_SPILL_V256_RESTORE;
   case 64:

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Fri Mar 22 03:11:21 2019
@@ -481,6 +481,7 @@ defm SI_SPILL_S32  : SI_SPILL_SGPR <SReg
 defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
 defm SI_SPILL_S96  : SI_SPILL_SGPR <SReg_96>;
 defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
+defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;
 defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
 defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
 
@@ -514,6 +515,7 @@ defm SI_SPILL_V32  : SI_SPILL_VGPR <VGPR
 defm SI_SPILL_V64  : SI_SPILL_VGPR <VReg_64>;
 defm SI_SPILL_V96  : SI_SPILL_VGPR <VReg_96>;
 defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
+defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;
 defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
 defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
 
@@ -771,6 +773,22 @@ foreach Index = 0-3 in {
   >;
 }
 
+foreach Index = 0-4 in {
+  def Extract_Element_v5i32_#Index : Extract_Element <
+    i32, v5i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v5i32_#Index : Insert_Element <
+    i32, v5i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v5f32_#Index : Extract_Element <
+    f32, v5f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v5f32_#Index : Insert_Element <
+    f32, v5f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
 foreach Index = 0-7 in {
   def Extract_Element_v8i32_#Index : Extract_Element <
     i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -900,6 +918,10 @@ def : BitConvert <v4i32, v2f64, VReg_128
 def : BitConvert <v2i64, v2f64, VReg_128>;
 def : BitConvert <v2f64, v2i64, VReg_128>;
 
+// 160-bit bitcast
+def : BitConvert <v5i32, v5f32, SGPR_160>;
+def : BitConvert <v5f32, v5i32, SGPR_160>;
+
 // 256-bit bitcast
 def : BitConvert <v8i32, v8f32, SReg_256>;
 def : BitConvert <v8f32, v8i32, SReg_256>;

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Fri Mar 22 03:11:21 2019
@@ -410,6 +410,11 @@ static unsigned getNumSubRegsForSpillOp(
   case AMDGPU::SI_SPILL_V256_SAVE:
   case AMDGPU::SI_SPILL_V256_RESTORE:
     return 8;
+  case AMDGPU::SI_SPILL_S160_SAVE:
+  case AMDGPU::SI_SPILL_S160_RESTORE:
+  case AMDGPU::SI_SPILL_V160_SAVE:
+  case AMDGPU::SI_SPILL_V160_RESTORE:
+    return 5;
   case AMDGPU::SI_SPILL_S128_SAVE:
   case AMDGPU::SI_SPILL_S128_RESTORE:
   case AMDGPU::SI_SPILL_V128_SAVE:
@@ -979,6 +984,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPR
   switch (MI->getOpcode()) {
   case AMDGPU::SI_SPILL_S512_SAVE:
   case AMDGPU::SI_SPILL_S256_SAVE:
+  case AMDGPU::SI_SPILL_S160_SAVE:
   case AMDGPU::SI_SPILL_S128_SAVE:
   case AMDGPU::SI_SPILL_S96_SAVE:
   case AMDGPU::SI_SPILL_S64_SAVE:
@@ -986,6 +992,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPR
     return spillSGPR(MI, FI, RS, true);
   case AMDGPU::SI_SPILL_S512_RESTORE:
   case AMDGPU::SI_SPILL_S256_RESTORE:
+  case AMDGPU::SI_SPILL_S160_RESTORE:
   case AMDGPU::SI_SPILL_S128_RESTORE:
   case AMDGPU::SI_SPILL_S96_RESTORE:
   case AMDGPU::SI_SPILL_S64_RESTORE:
@@ -1015,6 +1022,7 @@ void SIRegisterInfo::eliminateFrameIndex
     // SGPR register spill
     case AMDGPU::SI_SPILL_S512_SAVE:
     case AMDGPU::SI_SPILL_S256_SAVE:
+    case AMDGPU::SI_SPILL_S160_SAVE:
     case AMDGPU::SI_SPILL_S128_SAVE:
     case AMDGPU::SI_SPILL_S96_SAVE:
     case AMDGPU::SI_SPILL_S64_SAVE:
@@ -1026,6 +1034,7 @@ void SIRegisterInfo::eliminateFrameIndex
     // SGPR register restore
     case AMDGPU::SI_SPILL_S512_RESTORE:
     case AMDGPU::SI_SPILL_S256_RESTORE:
+    case AMDGPU::SI_SPILL_S160_RESTORE:
     case AMDGPU::SI_SPILL_S128_RESTORE:
     case AMDGPU::SI_SPILL_S96_RESTORE:
     case AMDGPU::SI_SPILL_S64_RESTORE:
@@ -1037,6 +1046,7 @@ void SIRegisterInfo::eliminateFrameIndex
     // VGPR register spill
     case AMDGPU::SI_SPILL_V512_SAVE:
     case AMDGPU::SI_SPILL_V256_SAVE:
+    case AMDGPU::SI_SPILL_V160_SAVE:
     case AMDGPU::SI_SPILL_V128_SAVE:
     case AMDGPU::SI_SPILL_V96_SAVE:
     case AMDGPU::SI_SPILL_V64_SAVE:
@@ -1059,6 +1069,7 @@ void SIRegisterInfo::eliminateFrameIndex
     case AMDGPU::SI_SPILL_V64_RESTORE:
     case AMDGPU::SI_SPILL_V96_RESTORE:
     case AMDGPU::SI_SPILL_V128_RESTORE:
+    case AMDGPU::SI_SPILL_V160_RESTORE:
     case AMDGPU::SI_SPILL_V256_RESTORE:
     case AMDGPU::SI_SPILL_V512_RESTORE: {
       const MachineOperand *VData = TII->getNamedOperand(*MI,
@@ -1251,6 +1262,8 @@ const TargetRegisterClass *SIRegisterInf
     &AMDGPU::SReg_96RegClass,
     &AMDGPU::VReg_128RegClass,
     &AMDGPU::SReg_128RegClass,
+    &AMDGPU::VReg_160RegClass,
+    &AMDGPU::SReg_160RegClass,
     &AMDGPU::VReg_256RegClass,
     &AMDGPU::SReg_256RegClass,
     &AMDGPU::VReg_512RegClass,
@@ -1283,6 +1296,8 @@ bool SIRegisterInfo::hasVGPRs(const Targ
     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
   case 128:
     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
+  case 160:
+    return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
   case 256:
     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
   case 512:
@@ -1303,6 +1318,8 @@ const TargetRegisterClass *SIRegisterInf
     return &AMDGPU::VReg_96RegClass;
   case 128:
     return &AMDGPU::VReg_128RegClass;
+  case 160:
+    return &AMDGPU::VReg_160RegClass;
   case 256:
     return &AMDGPU::VReg_256RegClass;
   case 512:
@@ -1323,6 +1340,8 @@ const TargetRegisterClass *SIRegisterInf
     return &AMDGPU::SReg_96RegClass;
   case 128:
     return &AMDGPU::SReg_128RegClass;
+  case 160:
+    return &AMDGPU::SReg_160RegClass;
   case 256:
     return &AMDGPU::SReg_256RegClass;
   case 512:
@@ -1349,6 +1368,8 @@ const TargetRegisterClass *SIRegisterInf
       return &AMDGPU::SReg_96RegClass;
     case 4:
       return &AMDGPU::SReg_128RegClass;
+    case 5:
+      return &AMDGPU::SReg_160RegClass;
     case 8:
       return &AMDGPU::SReg_256RegClass;
     case 16: /* fall-through */
@@ -1365,6 +1386,8 @@ const TargetRegisterClass *SIRegisterInf
       return &AMDGPU::VReg_96RegClass;
     case 4:
       return &AMDGPU::VReg_128RegClass;
+    case 5:
+      return &AMDGPU::VReg_160RegClass;
     case 8:
       return &AMDGPU::VReg_256RegClass;
     case 16: /* fall-through */
@@ -1427,6 +1450,10 @@ ArrayRef<int16_t> SIRegisterInfo::getReg
       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
     };
 
+    static const int16_t Sub0_4[] = {
+      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
+    };
+
     static const int16_t Sub0_3[] = {
       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
     };
@@ -1448,6 +1475,8 @@ ArrayRef<int16_t> SIRegisterInfo::getReg
       return makeArrayRef(Sub0_2);
     case 128:
       return makeArrayRef(Sub0_3);
+    case 160:
+      return makeArrayRef(Sub0_4);
     case 256:
       return makeArrayRef(Sub0_7);
     case 512:
@@ -1618,6 +1647,9 @@ SIRegisterInfo::getConstrainedRegClassFo
   case 128:
     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
                                                   &AMDGPU::SReg_128RegClass;
+  case 160:
+    return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
+                                                  &AMDGPU::SReg_160RegClass;
   case 256:
     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
                                                   &AMDGPU::SReg_256RegClass;

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td Fri Mar 22 03:11:21 2019
@@ -14,6 +14,7 @@ class getSubRegs<int size> {
   list<SubRegIndex> ret2 = [sub0, sub1];
   list<SubRegIndex> ret3 = [sub0, sub1, sub2];
   list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];
+  list<SubRegIndex> ret5 = [sub0, sub1, sub2, sub3, sub4];
   list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
   list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
                              sub4, sub5, sub6, sub7,
@@ -23,7 +24,8 @@ class getSubRegs<int size> {
   list<SubRegIndex> ret = !if(!eq(size, 2), ret2,
                               !if(!eq(size, 3), ret3,
                                   !if(!eq(size, 4), ret4,
-                                      !if(!eq(size, 8), ret8, ret16))));
+                                      !if(!eq(size, 5), ret5,
+                                          !if(!eq(size, 8), ret8, ret16)))));
 }
 
 //===----------------------------------------------------------------------===//
@@ -190,6 +192,14 @@ def SGPR_128Regs : RegisterTuples<getSub
                                (add (decimate (shl SGPR_32, 2), 4)),
                                (add (decimate (shl SGPR_32, 3), 4))]>;
 
+// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs.
+def SGPR_160Regs : RegisterTuples<getSubRegs<5>.ret,
+                            [(add (decimate SGPR_32, 4)),
+                             (add (decimate (shl SGPR_32, 1), 4)),
+                             (add (decimate (shl SGPR_32, 2), 4)),
+                             (add (decimate (shl SGPR_32, 3), 4)),
+                             (add (decimate (shl SGPR_32, 4), 4))]>;
+
 // SGPR 256-bit registers
 def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret,
                               [(add (decimate SGPR_32, 4)),
@@ -372,6 +382,14 @@ def VGPR_128 : RegisterTuples<getSubRegs
                                (add (shl VGPR_32, 2)),
                                (add (shl VGPR_32, 3))]>;
 
+// VGPR 160-bit registers
+def VGPR_160 : RegisterTuples<getSubRegs<5>.ret,
+                             [(add (trunc VGPR_32, 252)),
+                              (add (shl VGPR_32, 1)),
+                              (add (shl VGPR_32, 2)),
+                              (add (shl VGPR_32, 3)),
+                              (add (shl VGPR_32, 4))]>;
+
 // VGPR 256-bit registers
 def VGPR_256 : RegisterTuples<getSubRegs<8>.ret,
                               [(add (trunc VGPR_32, 249)),
@@ -505,6 +523,18 @@ def SReg_128 : RegisterClass<"AMDGPU", [
 
 } // End CopyCost = 2
 
+// There are no 5-component scalar instructions, but this is needed
+// for symmetry with VGPRs.
+def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+  (add SGPR_160Regs)> {
+  let AllocationPriority = 12;
+}
+
+def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+  (add SGPR_160)> {
+  let AllocationPriority = 12;
+}
+
 def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
   let AllocationPriority = 13;
 }
@@ -565,6 +595,14 @@ def VReg_128 : RegisterClass<"AMDGPU", [
   let AllocationPriority = 4;
 }
 
+def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, (add VGPR_160)> {
+  let Size = 160;
+
+  // Requires 5 v_mov_b32 to copy
+  let CopyCost = 5;
+  let AllocationPriority = 5;
+}
+
 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> {
   let Size = 256;
   let CopyCost = 8;

Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Fri Mar 22 03:11:21 2019
@@ -822,6 +822,10 @@ unsigned getRegBitWidth(unsigned RCID) {
   case AMDGPU::SReg_128RegClassID:
   case AMDGPU::VReg_128RegClassID:
     return 128;
+  case AMDGPU::SGPR_160RegClassID:
+  case AMDGPU::SReg_160RegClassID:
+  case AMDGPU::VReg_160RegClassID:
+    return 160;
   case AMDGPU::SReg_256RegClassID:
   case AMDGPU::VReg_256RegClassID:
     return 256;

Modified: llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll Fri Mar 22 03:11:21 2019
@@ -281,6 +281,23 @@ bb:
   ret void
 }
 
+; GCN-LABEL: {{^}}s_select_v5f32:
+; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
+
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+
+; GCN: buffer_store_dwordx
+define amdgpu_kernel void @s_select_v5f32(<5 x float> addrspace(1)* %out, <5 x float> %a, <5 x float> %b, i32 %c) #0 {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, <5 x float> %a, <5 x float> %b
+  store <5 x float> %select, <5 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
 ; GCN-LABEL: {{^}}select_v8f32:
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32

Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll Fri Mar 22 03:11:21 2019
@@ -139,6 +139,66 @@ ret:
   ret void
 }
 
+; ALL-LABEL: {{^}}spill_sgpr_x5:
+; SMEM: s_add_u32 m0, s3, 0x100{{$}}
+; SMEM: s_buffer_store_dword s
+; SMEM: s_buffer_store_dword s
+; SMEM: s_buffer_store_dword s
+; SMEM: s_buffer_store_dword s
+; SMEM: s_buffer_store_dword s
+; SMEM: s_cbranch_scc1
+
+; SMEM: s_add_u32 m0, s3, 0x100{{$}}
+; SMEM: s_buffer_load_dword s
+; SMEM: s_buffer_load_dword s
+; SMEM: s_buffer_load_dword s
+; SMEM: s_buffer_load_dword s
+; SMEM: s_buffer_load_dword s
+; SMEM: s_dcache_wb
+; SMEM: s_endpgm
+
+; FIXME: Should only need 4 bytes
+; SMEM: ScratchSize: 24
+
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
+; VGPR: s_cbranch_scc1
+
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
+
+
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: s_cbranch_scc1
+
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+define amdgpu_kernel void @spill_sgpr_x5(i32 addrspace(1)* %out, i32 %in) #0 {
+  %wide.sgpr = call <5 x i32>  asm sideeffect "; def $0", "=s" () #0
+  %cmp = icmp eq i32 %in, 0
+  br i1 %cmp, label %bb0, label %ret
+
+bb0:
+  call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr) #0
+  br label %ret
+
+ret:
+  ret void
+}
+
 ; ALL-LABEL: {{^}}spill_sgpr_x8:
 
 ; SMEM: s_add_u32 m0, s3, 0x100{{$}}




More information about the llvm-commits mailing list