[llvm] r176346 - R600/SI: handle all registers in copyPhysReg v2
Christian Konig
christian.koenig at amd.com
Fri Mar 1 01:46:27 PST 2013
Author: ckoenig
Date: Fri Mar 1 03:46:27 2013
New Revision: 176346
URL: http://llvm.org/viewvc/llvm-project?rev=176346&view=rev
Log:
R600/SI: handle all registers in copyPhysReg v2
v2: based on Michels patch, but now allows copying of all registers sizes.
Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
Signed-off-by: Christian König <christian.koenig at amd.com>
Modified:
llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.cpp?rev=176346&r1=176345&r2=176346&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.cpp Fri Mar 1 03:46:27 2013
@@ -33,36 +33,108 @@ const SIRegisterInfo &SIInstrInfo::getRe
void
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+
// If we are trying to copy to or from SCC, there is a bug somewhere else in
// the backend. While it may be theoretically possible to do this, it should
// never be necessary.
assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
- if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
- assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
- AMDGPU::SReg_64RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub0))
- .addReg(RI.getSubReg(SrcReg, AMDGPU::sub0), getKillRegState(KillSrc))
- .addReg(DestReg, RegState::Define | RegState::Implicit);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub1))
- .addReg(RI.getSubReg(SrcReg, AMDGPU::sub1), getKillRegState(KillSrc));
+ const int16_t Sub0_15[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
+ };
+
+ const int16_t Sub0_7[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
+ };
+
+ const int16_t Sub0_3[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
+ };
+
+ const int16_t Sub0_1[] = {
+ AMDGPU::sub0, AMDGPU::sub1, 0
+ };
+
+ unsigned Opcode;
+ const int16_t *SubIndices;
+
+ if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_3;
+
+ } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_7;
+
+ } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
+ Opcode = AMDGPU::S_MOV_B32;
+ SubIndices = Sub0_15;
+
} else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
- AMDGPU::SReg_32RegClass.contains(SrcReg));
+ AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+
+ } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_64RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_1;
+
+ } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_128RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_3;
+
+ } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_256RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_7;
+
+ } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_512RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_15;
+
} else {
- assert(AMDGPU::SReg_32RegClass.contains(DestReg));
- assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ llvm_unreachable("Can't copy register!");
+ }
+
+ while (unsigned SubIdx = *SubIndices++) {
+ MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
+ get(Opcode), RI.getSubReg(DestReg, SubIdx));
+
+ Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
+
+ if (*SubIndices)
+ Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
}
}
More information about the llvm-commits
mailing list