[llvm] r356735 - [AMDGPU] Added v5i32 and v5f32 register classes
Tim Renouf via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 22 03:11:21 PDT 2019
Author: tpr
Date: Fri Mar 22 03:11:21 2019
New Revision: 356735
URL: http://llvm.org/viewvc/llvm-project?rev=356735&view=rev
Log:
[AMDGPU] Added v5i32 and v5f32 register classes
They are not used by anything yet, but a subsequent commit will start
using them for image ops that return 5 dwords.
Differential Revision: https://reviews.llvm.org/D58903
Change-Id: I63e1904081e39a6d66e4eb96d51df25ad399d271
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll
llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td Fri Mar 22 03:11:21 2019
@@ -110,11 +110,12 @@ def CC_AMDGPU_Func : CallingConv<[
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
- CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
+ CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v5i32, v5f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>,
CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
+ CCIfType<[v5i32, v5f32], CCAssignToStack<20, 4>>,
CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
]>;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Fri Mar 22 03:11:21 2019
@@ -544,6 +544,8 @@ static unsigned selectSGPRVectorRegClass
return AMDGPU::SGPR_96RegClassID;
case 4:
return AMDGPU::SReg_128RegClassID;
+ case 5:
+ return AMDGPU::SGPR_160RegClassID;
case 8:
return AMDGPU::SReg_256RegClassID;
case 16:
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Mar 22 03:11:21 2019
@@ -156,6 +156,9 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::LOAD, MVT::v5f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32);
+
setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
@@ -244,6 +247,9 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::STORE, MVT::v5f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32);
+
setOperationAction(ISD::STORE, MVT::v8f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
@@ -335,6 +341,8 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
@@ -343,6 +351,8 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
@@ -402,7 +412,7 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
static const MVT::SimpleValueType VectorIntTypes[] = {
- MVT::v2i32, MVT::v3i32, MVT::v4i32
+ MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32
};
for (MVT VT : VectorIntTypes) {
@@ -444,7 +454,7 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
}
static const MVT::SimpleValueType FloatVectorTypes[] = {
- MVT::v2f32, MVT::v3f32, MVT::v4f32
+ MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32
};
for (MVT VT : FloatVectorTypes) {
@@ -492,6 +502,9 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::SELECT, MVT::v4f32, Promote);
AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::SELECT, MVT::v5f32, Promote);
+ AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32);
+
// There are no libcalls of any kind.
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Fri Mar 22 03:11:21 2019
@@ -357,6 +357,9 @@ void AMDGPUInstPrinter::printRegOperand(
} else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo)) {
O << 'v';
NumRegs = 3;
+ } else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo)) {
+ O << 'v';
+ NumRegs = 5;
} else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) {
O << 'v';
NumRegs = 8;
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Mar 22 03:11:21 2019
@@ -132,6 +132,9 @@ SITargetLowering::SITargetLowering(const
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
+ addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
+
addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
@@ -155,6 +158,7 @@ SITargetLowering::SITargetLowering(const
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v3i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v5i32, Custom);
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
setOperationAction(ISD::LOAD, MVT::i1, Custom);
@@ -163,6 +167,7 @@ SITargetLowering::SITargetLowering(const
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v3i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v5i32, Custom);
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
@@ -336,6 +341,12 @@ SITargetLowering::SITargetLowering(const
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i32, Expand);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4f32, Expand);
+ // Deal with vec5 vector operations when widened to vec8.
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5i32, Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5f32, Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i32, Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8f32, Expand);
+
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
// and output demarshalling
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
@@ -9688,6 +9699,9 @@ SITargetLowering::getRegForInlineAsmCons
case 128:
RC = &AMDGPU::SReg_128RegClass;
break;
+ case 160:
+ RC = &AMDGPU::SReg_160RegClass;
+ break;
case 256:
RC = &AMDGPU::SReg_256RegClass;
break;
@@ -9713,6 +9727,9 @@ SITargetLowering::getRegForInlineAsmCons
case 128:
RC = &AMDGPU::VReg_128RegClass;
break;
+ case 160:
+ RC = &AMDGPU::VReg_160RegClass;
+ break;
case 256:
RC = &AMDGPU::VReg_256RegClass;
break;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Fri Mar 22 03:11:21 2019
@@ -845,6 +845,8 @@ static unsigned getSGPRSpillSaveOpcode(u
return AMDGPU::SI_SPILL_S96_SAVE;
case 16:
return AMDGPU::SI_SPILL_S128_SAVE;
+ case 20:
+ return AMDGPU::SI_SPILL_S160_SAVE;
case 32:
return AMDGPU::SI_SPILL_S256_SAVE;
case 64:
@@ -864,6 +866,8 @@ static unsigned getVGPRSpillSaveOpcode(u
return AMDGPU::SI_SPILL_V96_SAVE;
case 16:
return AMDGPU::SI_SPILL_V128_SAVE;
+ case 20:
+ return AMDGPU::SI_SPILL_V160_SAVE;
case 32:
return AMDGPU::SI_SPILL_V256_SAVE;
case 64:
@@ -949,6 +953,8 @@ static unsigned getSGPRSpillRestoreOpcod
return AMDGPU::SI_SPILL_S96_RESTORE;
case 16:
return AMDGPU::SI_SPILL_S128_RESTORE;
+ case 20:
+ return AMDGPU::SI_SPILL_S160_RESTORE;
case 32:
return AMDGPU::SI_SPILL_S256_RESTORE;
case 64:
@@ -968,6 +974,8 @@ static unsigned getVGPRSpillRestoreOpcod
return AMDGPU::SI_SPILL_V96_RESTORE;
case 16:
return AMDGPU::SI_SPILL_V128_RESTORE;
+ case 20:
+ return AMDGPU::SI_SPILL_V160_RESTORE;
case 32:
return AMDGPU::SI_SPILL_V256_RESTORE;
case 64:
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Fri Mar 22 03:11:21 2019
@@ -481,6 +481,7 @@ defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg
defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
defm SI_SPILL_S96 : SI_SPILL_SGPR <SReg_96>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
+defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
@@ -514,6 +515,7 @@ defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR
defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>;
defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>;
defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
+defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;
defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
@@ -771,6 +773,22 @@ foreach Index = 0-3 in {
>;
}
+foreach Index = 0-4 in {
+ def Extract_Element_v5i32_#Index : Extract_Element <
+ i32, v5i32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v5i32_#Index : Insert_Element <
+ i32, v5i32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v5f32_#Index : Extract_Element <
+ f32, v5f32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v5f32_#Index : Insert_Element <
+ f32, v5f32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
foreach Index = 0-7 in {
def Extract_Element_v8i32_#Index : Extract_Element <
i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -900,6 +918,10 @@ def : BitConvert <v4i32, v2f64, VReg_128
def : BitConvert <v2i64, v2f64, VReg_128>;
def : BitConvert <v2f64, v2i64, VReg_128>;
+// 160-bit bitcast
+def : BitConvert <v5i32, v5f32, SGPR_160>;
+def : BitConvert <v5f32, v5i32, SGPR_160>;
+
// 256-bit bitcast
def : BitConvert <v8i32, v8f32, SReg_256>;
def : BitConvert <v8f32, v8i32, SReg_256>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Fri Mar 22 03:11:21 2019
@@ -410,6 +410,11 @@ static unsigned getNumSubRegsForSpillOp(
case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V256_RESTORE:
return 8;
+ case AMDGPU::SI_SPILL_S160_SAVE:
+ case AMDGPU::SI_SPILL_S160_RESTORE:
+ case AMDGPU::SI_SPILL_V160_SAVE:
+ case AMDGPU::SI_SPILL_V160_RESTORE:
+ return 5;
case AMDGPU::SI_SPILL_S128_SAVE:
case AMDGPU::SI_SPILL_S128_RESTORE:
case AMDGPU::SI_SPILL_V128_SAVE:
@@ -979,6 +984,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPR
switch (MI->getOpcode()) {
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S128_SAVE:
case AMDGPU::SI_SPILL_S96_SAVE:
case AMDGPU::SI_SPILL_S64_SAVE:
@@ -986,6 +992,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPR
return spillSGPR(MI, FI, RS, true);
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S160_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE:
case AMDGPU::SI_SPILL_S96_RESTORE:
case AMDGPU::SI_SPILL_S64_RESTORE:
@@ -1015,6 +1022,7 @@ void SIRegisterInfo::eliminateFrameIndex
// SGPR register spill
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S128_SAVE:
case AMDGPU::SI_SPILL_S96_SAVE:
case AMDGPU::SI_SPILL_S64_SAVE:
@@ -1026,6 +1034,7 @@ void SIRegisterInfo::eliminateFrameIndex
// SGPR register restore
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S160_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE:
case AMDGPU::SI_SPILL_S96_RESTORE:
case AMDGPU::SI_SPILL_S64_RESTORE:
@@ -1037,6 +1046,7 @@ void SIRegisterInfo::eliminateFrameIndex
// VGPR register spill
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V256_SAVE:
+ case AMDGPU::SI_SPILL_V160_SAVE:
case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V96_SAVE:
case AMDGPU::SI_SPILL_V64_SAVE:
@@ -1059,6 +1069,7 @@ void SIRegisterInfo::eliminateFrameIndex
case AMDGPU::SI_SPILL_V64_RESTORE:
case AMDGPU::SI_SPILL_V96_RESTORE:
case AMDGPU::SI_SPILL_V128_RESTORE:
+ case AMDGPU::SI_SPILL_V160_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
@@ -1251,6 +1262,8 @@ const TargetRegisterClass *SIRegisterInf
&AMDGPU::SReg_96RegClass,
&AMDGPU::VReg_128RegClass,
&AMDGPU::SReg_128RegClass,
+ &AMDGPU::VReg_160RegClass,
+ &AMDGPU::SReg_160RegClass,
&AMDGPU::VReg_256RegClass,
&AMDGPU::SReg_256RegClass,
&AMDGPU::VReg_512RegClass,
@@ -1283,6 +1296,8 @@ bool SIRegisterInfo::hasVGPRs(const Targ
return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
case 128:
return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
+ case 160:
+ return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
case 256:
return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
case 512:
@@ -1303,6 +1318,8 @@ const TargetRegisterClass *SIRegisterInf
return &AMDGPU::VReg_96RegClass;
case 128:
return &AMDGPU::VReg_128RegClass;
+ case 160:
+ return &AMDGPU::VReg_160RegClass;
case 256:
return &AMDGPU::VReg_256RegClass;
case 512:
@@ -1323,6 +1340,8 @@ const TargetRegisterClass *SIRegisterInf
return &AMDGPU::SReg_96RegClass;
case 128:
return &AMDGPU::SReg_128RegClass;
+ case 160:
+ return &AMDGPU::SReg_160RegClass;
case 256:
return &AMDGPU::SReg_256RegClass;
case 512:
@@ -1349,6 +1368,8 @@ const TargetRegisterClass *SIRegisterInf
return &AMDGPU::SReg_96RegClass;
case 4:
return &AMDGPU::SReg_128RegClass;
+ case 5:
+ return &AMDGPU::SReg_160RegClass;
case 8:
return &AMDGPU::SReg_256RegClass;
case 16: /* fall-through */
@@ -1365,6 +1386,8 @@ const TargetRegisterClass *SIRegisterInf
return &AMDGPU::VReg_96RegClass;
case 4:
return &AMDGPU::VReg_128RegClass;
+ case 5:
+ return &AMDGPU::VReg_160RegClass;
case 8:
return &AMDGPU::VReg_256RegClass;
case 16: /* fall-through */
@@ -1427,6 +1450,10 @@ ArrayRef<int16_t> SIRegisterInfo::getReg
AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
};
+ static const int16_t Sub0_4[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
+ };
+
static const int16_t Sub0_3[] = {
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
};
@@ -1448,6 +1475,8 @@ ArrayRef<int16_t> SIRegisterInfo::getReg
return makeArrayRef(Sub0_2);
case 128:
return makeArrayRef(Sub0_3);
+ case 160:
+ return makeArrayRef(Sub0_4);
case 256:
return makeArrayRef(Sub0_7);
case 512:
@@ -1618,6 +1647,9 @@ SIRegisterInfo::getConstrainedRegClassFo
case 128:
return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
&AMDGPU::SReg_128RegClass;
+ case 160:
+ return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
+ &AMDGPU::SReg_160RegClass;
case 256:
return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
&AMDGPU::SReg_256RegClass;
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td Fri Mar 22 03:11:21 2019
@@ -14,6 +14,7 @@ class getSubRegs<int size> {
list<SubRegIndex> ret2 = [sub0, sub1];
list<SubRegIndex> ret3 = [sub0, sub1, sub2];
list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];
+ list<SubRegIndex> ret5 = [sub0, sub1, sub2, sub3, sub4];
list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
sub4, sub5, sub6, sub7,
@@ -23,7 +24,8 @@ class getSubRegs<int size> {
list<SubRegIndex> ret = !if(!eq(size, 2), ret2,
!if(!eq(size, 3), ret3,
!if(!eq(size, 4), ret4,
- !if(!eq(size, 8), ret8, ret16))));
+ !if(!eq(size, 5), ret5,
+ !if(!eq(size, 8), ret8, ret16)))));
}
//===----------------------------------------------------------------------===//
@@ -190,6 +192,14 @@ def SGPR_128Regs : RegisterTuples<getSub
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4))]>;
+// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs.
+def SGPR_160Regs : RegisterTuples<getSubRegs<5>.ret,
+ [(add (decimate SGPR_32, 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4))]>;
+
// SGPR 256-bit registers
def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret,
[(add (decimate SGPR_32, 4)),
@@ -372,6 +382,14 @@ def VGPR_128 : RegisterTuples<getSubRegs
(add (shl VGPR_32, 2)),
(add (shl VGPR_32, 3))]>;
+// VGPR 160-bit registers
+def VGPR_160 : RegisterTuples<getSubRegs<5>.ret,
+ [(add (trunc VGPR_32, 252)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4))]>;
+
// VGPR 256-bit registers
def VGPR_256 : RegisterTuples<getSubRegs<8>.ret,
[(add (trunc VGPR_32, 249)),
@@ -505,6 +523,18 @@ def SReg_128 : RegisterClass<"AMDGPU", [
} // End CopyCost = 2
+// There are no 5-component scalar instructions, but this is needed
+// for symmetry with VGPRs.
+def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+ (add SGPR_160Regs)> {
+ let AllocationPriority = 12;
+}
+
+def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+ (add SGPR_160)> {
+ let AllocationPriority = 12;
+}
+
def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
let AllocationPriority = 13;
}
@@ -565,6 +595,14 @@ def VReg_128 : RegisterClass<"AMDGPU", [
let AllocationPriority = 4;
}
+def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, (add VGPR_160)> {
+ let Size = 160;
+
+ // Requires 5 v_mov_b32 to copy
+ let CopyCost = 5;
+ let AllocationPriority = 5;
+}
+
def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> {
let Size = 256;
let CopyCost = 8;
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Fri Mar 22 03:11:21 2019
@@ -822,6 +822,10 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::SReg_128RegClassID:
case AMDGPU::VReg_128RegClassID:
return 128;
+ case AMDGPU::SGPR_160RegClassID:
+ case AMDGPU::SReg_160RegClassID:
+ case AMDGPU::VReg_160RegClassID:
+ return 160;
case AMDGPU::SReg_256RegClassID:
case AMDGPU::VReg_256RegClassID:
return 256;
Modified: llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll Fri Mar 22 03:11:21 2019
@@ -281,6 +281,23 @@ bb:
ret void
}
+; GCN-LABEL: {{^}}s_select_v5f32:
+; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
+
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+; GCN: v_cndmask_b32_e32
+
+; GCN: buffer_store_dwordx
+define amdgpu_kernel void @s_select_v5f32(<5 x float> addrspace(1)* %out, <5 x float> %a, <5 x float> %b, i32 %c) #0 {
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, <5 x float> %a, <5 x float> %b
+ store <5 x float> %select, <5 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
; GCN-LABEL: {{^}}select_v8f32:
; GCN: v_cndmask_b32_e32
; GCN: v_cndmask_b32_e32
Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll?rev=356735&r1=356734&r2=356735&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll Fri Mar 22 03:11:21 2019
@@ -139,6 +139,66 @@ ret:
ret void
}
+; ALL-LABEL: {{^}}spill_sgpr_x5:
+; SMEM: s_add_u32 m0, s3, 0x100{{$}}
+; SMEM: s_buffer_store_dword s
+; SMEM: s_buffer_store_dword s
+; SMEM: s_buffer_store_dword s
+; SMEM: s_buffer_store_dword s
+; SMEM: s_buffer_store_dword s
+; SMEM: s_cbranch_scc1
+
+; SMEM: s_add_u32 m0, s3, 0x100{{$}}
+; SMEM: s_buffer_load_dword s
+; SMEM: s_buffer_load_dword s
+; SMEM: s_buffer_load_dword s
+; SMEM: s_buffer_load_dword s
+; SMEM: s_buffer_load_dword s
+; SMEM: s_dcache_wb
+; SMEM: s_endpgm
+
+; FIXME: Should only need 4 bytes
+; SMEM: ScratchSize: 24
+
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
+; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
+; VGPR: s_cbranch_scc1
+
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
+; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
+
+
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: s_cbranch_scc1
+
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+; VMEM: buffer_load_dword
+define amdgpu_kernel void @spill_sgpr_x5(i32 addrspace(1)* %out, i32 %in) #0 {
+ %wide.sgpr = call <5 x i32> asm sideeffect "; def $0", "=s" () #0
+ %cmp = icmp eq i32 %in, 0
+ br i1 %cmp, label %bb0, label %ret
+
+bb0:
+ call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr) #0
+ br label %ret
+
+ret:
+ ret void
+}
+
; ALL-LABEL: {{^}}spill_sgpr_x8:
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
More information about the llvm-commits
mailing list