[llvm] r204476 - R600/SI: Handle MUBUF instructions in SIInstrInfo::moveToVALU()
Tom Stellard
thomas.stellard at amd.com
Fri Mar 21 08:51:57 PDT 2014
Author: tstellar
Date: Fri Mar 21 10:51:57 2014
New Revision: 204476
URL: http://llvm.org/viewvc/llvm-project?rev=204476&view=rev
Log:
R600/SI: Handle MUBUF instructions in SIInstrInfo::moveToVALU()
Modified:
llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/R600/SIISelLowering.cpp
llvm/trunk/lib/Target/R600/SIInstrFormats.td
llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
llvm/trunk/lib/Target/R600/SIInstrInfo.h
llvm/trunk/test/CodeGen/R600/salu-to-valu.ll
Modified: llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp?rev=204476&r1=204475&r2=204476&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp Fri Mar 21 10:51:57 2014
@@ -165,6 +165,9 @@ bool AMDGPUPassConfig::addPreRegAlloc()
addPass(createR600VectorRegMerger(*TM));
} else {
addPass(createSIFixSGPRCopiesPass(*TM));
+ // SIFixSGPRCopies can generate a lot of duplicate instructions,
+ // so we need to run MachineCSE afterwards.
+ addPass(&MachineCSEID);
}
return false;
}
Modified: llvm/trunk/lib/Target/R600/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.cpp?rev=204476&r1=204475&r2=204476&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIISelLowering.cpp Fri Mar 21 10:51:57 2014
@@ -25,8 +25,6 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Function.h"
-const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
-
using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
@@ -407,7 +405,7 @@ MachineBasicBlock * SITargetLowering::Em
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
.addImm(0);
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
- .addImm(RSRC_DATA_FORMAT >> 32);
+ .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
.addReg(SubRegHiLo)
.addImm(AMDGPU::sub0)
Modified: llvm/trunk/lib/Target/R600/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrFormats.td?rev=204476&r1=204475&r2=204476&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstrFormats.td Fri Mar 21 10:51:57 2014
@@ -369,6 +369,7 @@ class MUBUF <bits<7> op, dag outs, dag i
let EXP_CNT = 1;
let neverHasSideEffects = 1;
+ let UseNamedOperandTable = 1;
}
class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.cpp?rev=204476&r1=204475&r2=204476&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.cpp Fri Mar 21 10:51:57 2014
@@ -558,6 +558,32 @@ void SIInstrInfo::legalizeOpWithMove(Mac
MO.ChangeToRegister(Reg, false);
}
+unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
+ MachineRegisterInfo &MRI,
+ MachineOperand &SuperReg,
+ const TargetRegisterClass *SuperRC,
+ unsigned SubIdx,
+ const TargetRegisterClass *SubRC)
+ const {
+ assert(SuperReg.isReg());
+
+ unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+ unsigned SubReg = MRI.createVirtualRegister(SubRC);
+
+ // Just in case the super register is itself a sub-register, copy it to a new
+ // value so we don't need to wory about merging its subreg index with the
+ // SubIdx passed to this function. The register coalescer should be able to
+ // eliminate this extra copy.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
+ NewSuperReg)
+ .addOperand(SuperReg);
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
+ SubReg)
+ .addReg(NewSuperReg, 0, SubIdx);
+ return SubReg;
+}
+
void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
@@ -675,6 +701,110 @@ void SIInstrInfo::legalizeOperands(Machi
MI->getOperand(i).setReg(DstReg);
}
}
+
+ // Legalize MUBUF* instructions
+ // FIXME: If we start using the non-addr64 instructions for compute, we
+ // may need to legalize them here.
+
+ int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::srsrc);
+ int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::vaddr);
+ if (SRsrcIdx != -1 && VAddrIdx != -1) {
+ const TargetRegisterClass *VAddrRC =
+ RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
+
+ if(VAddrRC->getSize() == 8 &&
+ MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
+ // We have a MUBUF instruction that uses a 64-bit vaddr register and
+ // srsrc has the incorrect register class. In order to fix this, we
+ // need to extract the pointer from the resource descriptor (srsrc),
+ // add it to the value of vadd, then store the result in the vaddr
+ // operand. Then, we need to set the pointer field of the resource
+ // descriptor to zero.
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
+ MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
+ unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
+ unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+
+ // SRsrcPtrLo = srsrc:sub0
+ SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+ // SRsrcPtrHi = srsrc:sub1
+ SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+ // VAddrLo = vaddr:sub0
+ VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
+ &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+ // VAddrHi = vaddr:sub1
+ VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
+ &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+ // NewVaddrLo = SRsrcPtrLo + VAddrLo
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
+ NewVAddrLo)
+ .addReg(SRsrcPtrLo)
+ .addReg(VAddrLo)
+ .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
+
+ // NewVaddrHi = SRsrcPtrHi + VAddrHi
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
+ NewVAddrHi)
+ .addReg(SRsrcPtrHi)
+ .addReg(VAddrHi)
+ .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
+ .addReg(AMDGPU::VCC, RegState::Implicit);
+
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewVAddr)
+ .addReg(NewVAddrLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(NewVAddrHi)
+ .addImm(AMDGPU::sub1);
+
+ // Zero64 = 0
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
+ Zero64)
+ .addImm(0);
+
+ // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatLo)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+
+ // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatHi)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+
+ // NewSRsrc = {Zero64, SRsrcFormat}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
+
+ // Update the instruction to use NewVaddr
+ MI->getOperand(VAddrIdx).setReg(NewVAddr);
+ // Update the instruction to use NewSRsrc
+ MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
+ }
+ }
}
void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
@@ -731,8 +861,12 @@ void SIInstrInfo::moveToVALU(MachineInst
}
unsigned NewOpcode = getVALUOp(*Inst);
- if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
+ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
+ // We cannot move this instruction to the VALU, so we should try to
+ // legalize its operands instead.
+ legalizeOperands(Inst);
continue;
+ }
// Use the new VALU Opcode.
const MCInstrDesc &NewDesc = get(NewOpcode);
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.h?rev=204476&r1=204475&r2=204476&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.h Fri Mar 21 10:51:57 2014
@@ -25,6 +25,13 @@ class SIInstrInfo : public AMDGPUInstrIn
private:
const SIRegisterInfo RI;
+ unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
+ MachineRegisterInfo &MRI,
+ MachineOperand &SuperReg,
+ const TargetRegisterClass *SuperRC,
+ unsigned SubIdx,
+ const TargetRegisterClass *SubRC) const;
+
public:
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
@@ -142,6 +149,9 @@ namespace AMDGPU {
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);
+ const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
+
+
} // End namespace AMDGPU
} // End namespace llvm
Modified: llvm/trunk/test/CodeGen/R600/salu-to-valu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/salu-to-valu.ll?rev=204476&r1=204475&r2=204476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/salu-to-valu.ll (original)
+++ llvm/trunk/test/CodeGen/R600/salu-to-valu.ll Fri Mar 21 10:51:57 2014
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
; In this test both the pointer and the offset operands to the
; BUFFER_LOAD instructions end up being stored in vgprs. This
@@ -8,8 +8,14 @@
; (low 64-bits of srsrc).
; CHECK-LABEL: @mubuf
+
; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
+
+; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
+; instructions
+; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
+; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #1
More information about the llvm-commits
mailing list