[PATCH] R600/SI: Handle MUBUF instructions in moveToVALU
Matt Arsenault
Matthew.Arsenault at amd.com
Tue Mar 18 14:50:05 PDT 2014
On 03/18/2014 02:38 PM, Tom Stellard wrote:
> Hi,
>
> The attached patches teach SIInstrInfo::moveToVALU() how to handle MUBUF
> instructions. These instructions are already executed by the VALU, but
> we sometimes need to legalize the srsrc operand if its defining instruction
> has been moved from the SALU to the VALU.
>
> -Tom
>
> 0001-R600-SI-Use-SGPR_-32-64-reg-clases-when-lowering-SI_.patch
>
>
> From 43de42a6982230dcc7aa729da60506de94016998 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 18 Mar 2014 20:59:34 -0400
> Subject: [PATCH 1/3] R600/SI: Use SGPR_(32|64) reg clases when lowering
> SI_ADDR64_RSRC
>
> The SReg_(32|64) register classes contain special registers in addition
> to the numbered SGPRs. This can lead to machine verifier errors when
> these register classes are used as sub-registers for SReg_128, since
> SReg_128 only uses the numbered SGPRs.
>
> Replacing SReg_(32|64) with SGPR_(32|64) fixes this problem, since
> the SGPR_(32|64) register classes contain only numbered SGPRs.
>
> Tests cases for this are comming in a later commit.
> ---
> lib/Target/R600/SIISelLowering.cpp | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 8cf1b82..52e5a16 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -398,10 +398,10 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
> static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
> MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
> unsigned SuperReg = MI->getOperand(0).getReg();
> - unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> - unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> - unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
> - unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
> + unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
> + unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
> + unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> + unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo)
> .addOperand(MI->getOperand(1));
> BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
> -- 1.8.1.5
>
> 0002-R600-SI-Handle-S_MOV_B64-in-SIInstrInfo-moveToVALU.patch
>
>
> From f6694e5ba5a50b135950a29c66fea4664210c750 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 18 Mar 2014 21:54:07 -0400
> Subject: [PATCH 2/3] R600/SI: Handle S_MOV_B64 in SIInstrInfo::moveToVALU()
>
> ---
> lib/Target/R600/SIInstrInfo.cpp | 57 +++++++++++++++++++++++++++++++++++++--
> test/CodeGen/R600/salu-to-valu.ll | 42 +++++++++++++++++++++++++++++
> 2 files changed, 97 insertions(+), 2 deletions(-)
> create mode 100644 test/CodeGen/R600/salu-to-valu.ll
>
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 3ed8dfa..0401f25 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -680,12 +680,65 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
>
> while (!Worklist.empty()) {
> MachineInstr *Inst = Worklist.pop_back_val();
> + MachineBasicBlock *MBB = Inst->getParent();
> + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
> +
> + // Handle some special cases
> + switch(Inst->getOpcode()) {
> + case AMDGPU::S_MOV_B64: {
> + DebugLoc DL = Inst->getDebugLoc();
> +
> + // If the source operand is a register we can replace this with a
> + // copy
> + if (Inst->getOperand(1).isReg()) {
> + MachineInstr *Copy = BuildMI(*MBB, Inst, DL,
> + get(AMDGPU::COPY))
> + .addOperand(Inst->getOperand(0))
> + .addOperand(Inst->getOperand(1));
> + Worklist.push_back(Copy);
> + } else {
> + // Otherwise, we need to split this into two movs, because there is
> + // no 64-bit VALU move instruction.
> + unsigned LoSrc, HiSrc, LoDst, HiDst, Dst;
> + LoSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> + LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> + HiSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> + HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> + Dst = MRI.createVirtualRegister(
> + MRI.getRegClass(Inst->getOperand(0).getReg()));
> +
> + BuildMI(*MBB, Inst, DL, get(AMDGPU::EXTRACT_SUBREG), LoSrc)
> + .addOperand(Inst->getOperand(1))
> + .addImm(AMDGPU::sub0);
> + BuildMI(*MBB, Inst, DL, get(AMDGPU::EXTRACT_SUBREG), HiSrc)
> + .addOperand(Inst->getOperand(1))
> + .addImm(AMDGPU::sub1);
> + MachineInstr *Lo = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32),
> + LoDst)
> + .addReg(LoSrc);
> + MachineInstr *Hi = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32),
> + HiDst)
> + .addReg(HiSrc);
> +
> + BuildMI(*MBB, Inst, DL, get(AMDGPU::REG_SEQUENCE), Dst)
> + .addReg(LoDst)
> + .addImm(AMDGPU::sub0)
> + .addReg(HiDst)
> + .addImm(AMDGPU::sub1);
> +
> + MRI.replaceRegWith(Inst->getOperand(0).getReg(), Dst);
> + Worklist.push_back(Lo);
> + Worklist.push_back(Hi);
> + }
> + Inst->eraseFromParent();
> + continue;
> + }
> + }
> +
> unsigned NewOpcode = getVALUOp(*Inst);
> if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
> continue;
>
> - MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();
> -
> // Use the new VALU Opcode.
> const MCInstrDesc &NewDesc = get(NewOpcode);
> Inst->setDesc(NewDesc);
> diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
> new file mode 100644
> index 0000000..c989c9d
> --- /dev/null
> +++ b/test/CodeGen/R600/salu-to-valu.ll
> @@ -0,0 +1,42 @@
> +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
> +
> +; In this test both the pointer and the offset operands to the
> +; BUFFER_LOAD instructions end up being stored in vgprs. This
> +; requires us to add the pointer and offset together, store the
> +; result in the offset operand (vaddr), and then store 0 in an
> +; sgpr register pair and use that for the pointer operand
> +; (low 64-bits of srsrc).
> +
> +; CHECK-LABEL: @mubuf
> +; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
> +; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
> +define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
> +entry:
> + %0 = call i32 @llvm.r600.read.tidig.x() #1
> + %1 = call i32 @llvm.r600.read.tidig.y() #1
> + %2 = sext i32 %0 to i64
> + %3 = sext i32 %1 to i64
> + br label %loop
> +
> +loop:
> + %4 = phi i64 [0, %entry], [%5, %loop]
> + %5 = add i64 %2, %4
> + %6 = getelementptr i8 addrspace(1)* %in, i64 %5
> + %7 = load i8 addrspace(1)* %6, align 1
> + %8 = or i64 %5, 1
> + %9 = getelementptr i8 addrspace(1)* %in, i64 %8
> + %10 = load i8 addrspace(1)* %9, align 1
> + %11 = add i8 %7, %10
> + %12 = sext i8 %11 to i32
> + store i32 %12, i32 addrspace(1)* %out
> + %13 = icmp slt i64 %5, 10
> + br i1 %13, label %loop, label %done
> +
> +done:
> + ret void
> +}
> +
> +declare i32 @llvm.r600.read.tidig.x() #1
> +declare i32 @llvm.r600.read.tidig.y() #1
> +
> +attributes #1 = { nounwind readnone }
> -- 1.8.1.5
>
> 0003-R600-SI-Handle-MUBUF-instructions-in-SIInstrInfo-mov.patch
>
>
> From 3e23edb6e49507bf2bf1da6d86c80ea13f1a0541 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 18 Mar 2014 20:58:27 -0400
> Subject: [PATCH 3/3] R600/SI: Handle MUBUF instructions in
> SIInstrInfo::moveToVALU()
>
> ---
> lib/Target/R600/AMDGPUTargetMachine.cpp | 3 +
> lib/Target/R600/SIISelLowering.cpp | 4 +-
> lib/Target/R600/SIInstrFormats.td | 1 +
> lib/Target/R600/SIInstrInfo.cpp | 135 +++++++++++++++++++++++++++++++-
> lib/Target/R600/SIInstrInfo.h | 10 +++
> test/CodeGen/R600/salu-to-valu.ll | 7 +-
> 6 files changed, 155 insertions(+), 5 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 7f50428..b11fce3 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -165,6 +165,9 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
> addPass(createR600VectorRegMerger(*TM));
> } else {
> addPass(createSIFixSGPRCopiesPass(*TM));
> + // SIFixSGPRCopies can generate a lot of duplicate instructions,
> + // so we need to run MachineCSE afterwards.
> + addPass(&MachineCSEID);
> }
> return false;
> }
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 52e5a16..fd1e3a6 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -25,8 +25,6 @@
> #include "llvm/CodeGen/SelectionDAG.h"
> #include "llvm/IR/Function.h"
>
> -const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
> -
> using namespace llvm;
>
> SITargetLowering::SITargetLowering(TargetMachine &TM) :
> @@ -407,7 +405,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
> BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
> .addImm(0);
> BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
> - .addImm(RSRC_DATA_FORMAT >> 32);
> + .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
> BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
> .addReg(SubRegHiLo)
> .addImm(AMDGPU::sub0)
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 53ebaaf..aa2c22c 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -369,6 +369,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let EXP_CNT = 1;
>
> let neverHasSideEffects = 1;
> + let UseNamedOperandTable = 1;
> }
>
> class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 0401f25..68332ae 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -555,6 +555,31 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
> MO.ChangeToRegister(Reg, false);
> }
>
> +unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
> + MachineRegisterInfo &MRI,
> + MachineOperand &SuperReg,
> + const TargetRegisterClass *SuperRC,
> + unsigned SubIdx,
> + const TargetRegisterClass *SubRC)
> + const {
> + assert(SuperReg.isReg());
> +
> + unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
> + unsigned SubReg = MRI.createVirtualRegister(SubRC);
> +
> + // Just in case the super register is itself a sub-register, copy it to a new
> + // value so we don't need to wory about merging its subreg index with the
> + // SubIdx passed to this function. The register coalescer should be able to
> + // eliminate this extra copy.
> + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::COPY),
> + NewSuperReg)
> + .addOperand(SuperReg);
> +
> + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::COPY), SubReg)
> + .addReg(NewSuperReg, 0, SubIdx);
> + return SubReg;
> +}
Can you use TargetOpcode::COPY instead (and the same for the others)?
It's the same thing but makes it clearer it's not a target specific
thing. It makes it easier to find when looking for examples for using
those special instructions.
> +
> void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
> MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
> int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> @@ -672,6 +697,110 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
> MI->getOperand(i).setReg(DstReg);
> }
> }
> +
> + // Legalize MUBUF* instructions
> + // FIXME: If we start using the non-addr64 instructions for compute, we
> + // may need to legalize them here.
> +
> + int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> + AMDGPU::OpName::srsrc);
> + int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> + AMDGPU::OpName::vaddr);
> + if (SRsrcIdx != -1 && VAddrIdx != -1) {
> + const TargetRegisterClass *VAddrRC =
> + RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
> +
> + if(VAddrRC->getSize() == 8 &&
> + MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
> + // We have a MUBUF instruction that uses a 64-bit vaddr register and
> + // srsrc has the incorrect register class. In order to fix this, we
> + // need to extract the pointer from the resource descriptor (srsrc),
> + // add it to the value of vadd, then store the result in the vaddr
> + // operand. Then, we need to set the pointer field of the resource
> + // descriptor to zero.
> +
> + MachineBasicBlock &MBB = *MI->getParent();
> + MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
> + MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
> + unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
> + unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> + unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> + unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
> + unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> + unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> + unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> + unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
> +
> + // SRsrcPtrLo = srsrc:sub0
> + SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
> + &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
> +
> + // SRsrcPtrHi = srsrc:sub1
> + SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
> + &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
> +
> + // VAddrLo = vaddr:sub0
> + VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
> + &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
> +
> + // VAddrHi = vaddr:sub1
> + VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
> + &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
> +
> + // NewVaddrLo = SRsrcPtrLo + VAddrLo
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
> + NewVAddrLo)
> + .addReg(SRsrcPtrLo)
> + .addReg(VAddrLo)
> + .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
> +
> + // NewVaddrHi = SRsrcPtrHi + VAddrHi
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
> + NewVAddrHi)
> + .addReg(SRsrcPtrHi)
> + .addReg(VAddrHi)
> + .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
> + .addReg(AMDGPU::VCC, RegState::Implicit);
> +
> + // NewVaddr = {NewVaddrHi, NewVaddrLo}
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
> + NewVAddr)
> + .addReg(NewVAddrLo)
> + .addImm(AMDGPU::sub0)
> + .addReg(NewVAddrHi)
> + .addImm(AMDGPU::sub1);
> +
> + // Zero64 = 0
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
> + Zero64)
> + .addImm(0);
> +
> + // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
> + SRsrcFormatLo)
> + .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
> +
> + // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
> + SRsrcFormatHi)
> + .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
> +
> + // NewSRsrc = {Zero64, SRsrcFormat}
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
> + NewSRsrc)
> + .addReg(Zero64)
> + .addImm(AMDGPU::sub0_sub1)
> + .addReg(SRsrcFormatLo)
> + .addImm(AMDGPU::sub2)
> + .addReg(SRsrcFormatHi)
> + .addImm(AMDGPU::sub3);
> +
> + // Update the instruction to use NewVaddr
> + MI->getOperand(VAddrIdx).setReg(NewVAddr);
> + // Update the instruction to use NewSRsrc
> + MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
> + }
> + }
> }
>
> void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
> @@ -736,8 +865,12 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
> }
>
> unsigned NewOpcode = getVALUOp(*Inst);
> - if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
> + if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
> + // We cannot move this instruction to the VALU, so we should try to
> + // legalize its operands instead.
> + legalizeOperands(Inst);
> continue;
> + }
>
> // Use the new VALU Opcode.
> const MCInstrDesc &NewDesc = get(NewOpcode);
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index bff89d3..96df7c4 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -31,6 +31,13 @@ private:
> unsigned MovRelOp,
> unsigned Dst,
> unsigned Src0) const;
> +
> + unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
> + MachineRegisterInfo &MRI,
> + MachineOperand &SuperReg,
> + const TargetRegisterClass *SuperRC,
> + unsigned SubIdx,
> + const TargetRegisterClass *SubRC) const;
> public:
> explicit SIInstrInfo(AMDGPUTargetMachine &tm);
>
> @@ -148,6 +155,9 @@ namespace AMDGPU {
> int getCommuteRev(uint16_t Opcode);
> int getCommuteOrig(uint16_t Opcode);
>
> + const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
> +
> +
> } // End namespace AMDGPU
>
> } // End namespace llvm
> diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
> index c989c9d..b2aa534 100644
> --- a/test/CodeGen/R600/salu-to-valu.ll
> +++ b/test/CodeGen/R600/salu-to-valu.ll
> @@ -1,4 +1,4 @@
> -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
> +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
>
> ; In this test both the pointer and the offset operands to the
> ; BUFFER_LOAD instructions end up being stored in vgprs. This
> @@ -8,8 +8,13 @@
> ; (low 64-bits of srsrc).
>
> ; CHECK-LABEL: @mubuf
> +
> ; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
> ; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
> +
> +; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
> +; instructions
> +; CHECK-NOT: BUFFER_LOAD_UBYTE v{{[0-9]+}}, v
> define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
> entry:
> %0 = call i32 @llvm.r600.read.tidig.x() #1
I think it would be better if there were two positive checks for an SGPR
and an immediate, rather than not v
-Matt
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140318/b59681a6/attachment.html>
More information about the llvm-commits
mailing list