[llvm] r286118 - AMDGPU: Refactor copyPhysReg
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 7 08:39:22 PST 2016
Author: arsenm
Date: Mon Nov 7 10:39:22 2016
New Revision: 286118
URL: http://llvm.org/viewvc/llvm-project?rev=286118&view=rev
Log:
AMDGPU: Refactor copyPhysReg
Separate the subregister splitting logic to re-use later.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=286118&r1=286117&r2=286118&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Mon Nov 7 10:39:22 2016
@@ -349,51 +349,17 @@ void SIInstrInfo::copyPhysReg(MachineBas
MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg,
unsigned SrcReg, bool KillSrc) const {
+ const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
- static const int16_t Sub0_15[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
- AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
- };
-
- static const int16_t Sub0_15_64[] = {
- AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
- AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
- AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
- AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
- };
-
- static const int16_t Sub0_7[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
- };
-
- static const int16_t Sub0_7_64[] = {
- AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
- AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
- };
-
- static const int16_t Sub0_3[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- };
-
- static const int16_t Sub0_3_64[] = {
- AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
- };
-
- static const int16_t Sub0_2[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
- };
-
- static const int16_t Sub0_1[] = {
- AMDGPU::sub0, AMDGPU::sub1,
- };
-
- unsigned Opcode;
- ArrayRef<int16_t> SubIndices;
+ if (RC == &AMDGPU::VGPR_32RegClass) {
+ assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
+ if (RC == &AMDGPU::SReg_32RegClass) {
if (SrcReg == AMDGPU::SCC) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
.addImm(-1)
@@ -405,8 +371,9 @@ void SIInstrInfo::copyPhysReg(MachineBas
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
+ }
- } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
+ if (RC == &AMDGPU::SReg_64RegClass) {
if (DestReg == AMDGPU::VCC) {
if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
@@ -426,68 +393,29 @@ void SIInstrInfo::copyPhysReg(MachineBas
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
+ }
- } else if (DestReg == AMDGPU::SCC) {
+ if (DestReg == AMDGPU::SCC) {
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addImm(0);
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
return;
- } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
- assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B64;
- SubIndices = Sub0_3_64;
-
- } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
- assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B64;
- SubIndices = Sub0_7_64;
-
- } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
- assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B64;
- SubIndices = Sub0_15_64;
-
- } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
- assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
- AMDGPU::SReg_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
-
- } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
- AMDGPU::SReg_64RegClass.contains(SrcReg));
- Opcode = AMDGPU::V_MOV_B32_e32;
- SubIndices = Sub0_1;
-
- } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
- Opcode = AMDGPU::V_MOV_B32_e32;
- SubIndices = Sub0_2;
-
- } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
- AMDGPU::SReg_128RegClass.contains(SrcReg));
- Opcode = AMDGPU::V_MOV_B32_e32;
- SubIndices = Sub0_3;
-
- } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
- AMDGPU::SReg_256RegClass.contains(SrcReg));
- Opcode = AMDGPU::V_MOV_B32_e32;
- SubIndices = Sub0_7;
-
- } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
- AMDGPU::SReg_512RegClass.contains(SrcReg));
- Opcode = AMDGPU::V_MOV_B32_e32;
- SubIndices = Sub0_15;
+ }
- } else {
- llvm_unreachable("Can't copy register!");
+ unsigned EltSize = 4;
+ unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+ if (RI.isSGPRClass(RC)) {
+ if (RC->getSize() > 4) {
+ Opcode = AMDGPU::S_MOV_B64;
+ EltSize = 8;
+ } else {
+ Opcode = AMDGPU::S_MOV_B32;
+ EltSize = 4;
+ }
}
+ ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=286118&r1=286117&r2=286118&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Mon Nov 7 10:39:22 2016
@@ -1168,3 +1168,106 @@ unsigned SIRegisterInfo::getMaxNumVGPRs(
return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST);
}
+
+ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
+ unsigned EltSize) const {
+ if (EltSize == 4) {
+ static const int16_t Sub0_15[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+ };
+
+ static const int16_t Sub0_7[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ };
+
+ static const int16_t Sub0_3[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ };
+
+ static const int16_t Sub0_2[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
+ };
+
+ static const int16_t Sub0_1[] = {
+ AMDGPU::sub0, AMDGPU::sub1,
+ };
+
+ switch (AMDGPU::getRegBitWidth(*RC->MC)) {
+ case 32:
+ return {};
+ case 64:
+ return Sub0_1;
+ case 96:
+ return Sub0_2;
+ case 128:
+ return Sub0_3;
+ case 256:
+ return Sub0_7;
+ case 512:
+ return Sub0_15;
+ default:
+ llvm_unreachable("unhandled register size");
+ }
+ }
+
+ if (EltSize == 8) {
+ static const int16_t Sub0_15_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+ AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
+ AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+ AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
+ };
+
+ static const int16_t Sub0_7_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+ AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
+ };
+
+
+ static const int16_t Sub0_3_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
+ };
+
+ switch (AMDGPU::getRegBitWidth(*RC->MC)) {
+ case 64:
+ return {};
+ case 128:
+ return Sub0_3_64;
+ case 256:
+ return Sub0_7_64;
+ case 512:
+ return Sub0_15_64;
+ default:
+ llvm_unreachable("unhandled register size");
+ }
+ }
+
+ assert(EltSize == 16 && "unhandled register spill split size");
+
+ static const int16_t Sub0_15_128[] = {
+ AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::sub4_sub5_sub6_sub7,
+ AMDGPU::sub8_sub9_sub10_sub11,
+ AMDGPU::sub12_sub13_sub14_sub15
+ };
+
+ static const int16_t Sub0_7_128[] = {
+ AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::sub4_sub5_sub6_sub7
+ };
+
+ switch (AMDGPU::getRegBitWidth(*RC->MC)) {
+ case 128:
+ return {};
+ case 256:
+ return Sub0_7_128;
+ case 512:
+ return Sub0_15_128;
+ default:
+ llvm_unreachable("unhandled register size");
+ }
+}
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=286118&r1=286117&r2=286118&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h Mon Nov 7 10:39:22 2016
@@ -245,6 +245,9 @@ public:
/// unit requirement.
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
+ ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
+ unsigned EltSize) const;
+
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp, const MachineOperand *SrcDst,
More information about the llvm-commits
mailing list