[PATCH] R600/SI: Handle MUBUF instructions in moveToVALU

Tue Mar 18 14:50:05 PDT 2014

On 03/18/2014 02:38 PM, Tom Stellard wrote:
> Hi,
>
> The attached patches teach SIInstrInfo::moveToVALU() how to handle MUBUF
> instructions.  These instructions are already executed by the VALU, but
> we sometimes need to legalize the srsrc operand if its defining instruction
> has been moved from the SALU to the VALU.
>
> -Tom
>
> 0001-R600-SI-Use-SGPR_-32-64-reg-clases-when-lowering-SI_.patch
>
>
>  From 43de42a6982230dcc7aa729da60506de94016998 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 18 Mar 2014 20:59:34 -0400
> Subject: [PATCH 1/3] R600/SI: Use SGPR_(32|64) reg clases when lowering
>   SI_ADDR64_RSRC
>
> The SReg_(32|64) register classes contain special registers in addition
> to the numbered SGPRs.  This can lead to machine verifier errors when
> these register classes are used as sub-registers for SReg_128, since
> SReg_128 only uses the numbered SGPRs.
>
> Replacing SReg_(32|64) with SGPR_(32|64) fixes this problem, since
> the SGPR_(32|64) register classes contain only numbered SGPRs.
>
> Tests cases for this are comming in a later commit.
> ---
>   lib/Target/R600/SIISelLowering.cpp | 8 ++++----
>   1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 8cf1b82..52e5a16 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -398,10 +398,10 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
>         static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
>       MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
>       unsigned SuperReg = MI->getOperand(0).getReg();
> -    unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> -    unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> -    unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
> -    unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
> +    unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
> +    unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
> +    unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> +    unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
>       BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo)
>               .addOperand(MI->getOperand(1));
>       BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
> -- 1.8.1.5
>
> 0002-R600-SI-Handle-S_MOV_B64-in-SIInstrInfo-moveToVALU.patch
>
>
>  From f6694e5ba5a50b135950a29c66fea4664210c750 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 18 Mar 2014 21:54:07 -0400
> Subject: [PATCH 2/3] R600/SI: Handle S_MOV_B64 in SIInstrInfo::moveToVALU()
>
> ---
>   lib/Target/R600/SIInstrInfo.cpp   | 57 +++++++++++++++++++++++++++++++++++++--
>   test/CodeGen/R600/salu-to-valu.ll | 42 +++++++++++++++++++++++++++++
>   2 files changed, 97 insertions(+), 2 deletions(-)
>   create mode 100644 test/CodeGen/R600/salu-to-valu.ll
>
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 3ed8dfa..0401f25 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -680,12 +680,65 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
>   
>     while (!Worklist.empty()) {
>       MachineInstr *Inst = Worklist.pop_back_val();
> +    MachineBasicBlock *MBB = Inst->getParent();
> +    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
> +
> +    // Handle some special cases
> +    switch(Inst->getOpcode()) {
> +      case AMDGPU::S_MOV_B64: {
> +        DebugLoc DL = Inst->getDebugLoc();
> +
> +        // If the source operand is a register we can replace this with a
> +        // copy
> +        if (Inst->getOperand(1).isReg()) {
> +          MachineInstr *Copy = BuildMI(*MBB, Inst, DL,
> +                                       get(AMDGPU::COPY))
> +                                       .addOperand(Inst->getOperand(0))
> +                                       .addOperand(Inst->getOperand(1));
> +          Worklist.push_back(Copy);
> +        } else {
> +          // Otherwise, we need to split this into two movs, because there is
> +          // no 64-bit VALU move instruction.
> +          unsigned LoSrc, HiSrc, LoDst, HiDst, Dst;
> +          LoSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> +          LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> +          HiSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> +          HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> +          Dst = MRI.createVirtualRegister(
> +              MRI.getRegClass(Inst->getOperand(0).getReg()));
> +
> +          BuildMI(*MBB, Inst, DL, get(AMDGPU::EXTRACT_SUBREG), LoSrc)
> +                  .addOperand(Inst->getOperand(1))
> +                  .addImm(AMDGPU::sub0);
> +          BuildMI(*MBB, Inst, DL, get(AMDGPU::EXTRACT_SUBREG), HiSrc)
> +                  .addOperand(Inst->getOperand(1))
> +                  .addImm(AMDGPU::sub1);
> +          MachineInstr *Lo = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32),
> +                                     LoDst)
> +                                    .addReg(LoSrc);
> +          MachineInstr *Hi = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32),
> +                                     HiDst)
> +                                     .addReg(HiSrc);
> +
> +          BuildMI(*MBB, Inst, DL, get(AMDGPU::REG_SEQUENCE), Dst)
> +                  .addReg(LoDst)
> +                  .addImm(AMDGPU::sub0)
> +                  .addReg(HiDst)
> +                  .addImm(AMDGPU::sub1);
> +
> +          MRI.replaceRegWith(Inst->getOperand(0).getReg(), Dst);
> +          Worklist.push_back(Lo);
> +          Worklist.push_back(Hi);
> +        }
> +        Inst->eraseFromParent();
> +        continue;
> +      }
> +    }
> +
>       unsigned NewOpcode = getVALUOp(*Inst);
>       if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
>         continue;
>   
> -    MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();
> -
>       // Use the new VALU Opcode.
>       const MCInstrDesc &NewDesc = get(NewOpcode);
>       Inst->setDesc(NewDesc);
> diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
> new file mode 100644
> index 0000000..c989c9d
> --- /dev/null
> +++ b/test/CodeGen/R600/salu-to-valu.ll
> @@ -0,0 +1,42 @@
> +; RUN: llc < %s -march=r600 -mcpu=SI  | FileCheck %s
> +
> +; In this test both the pointer and the offset operands to the
> +; BUFFER_LOAD instructions end up being stored in vgprs.  This
> +; requires us to add the pointer and offset together, store the
> +; result in the offset operand (vaddr), and then store 0 in an
> +; sgpr register pair and use that for the pointer operand
> +; (low 64-bits of srsrc).
> +
> +; CHECK-LABEL: @mubuf
> +; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
> +; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
> +define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
> +entry:
> +  %0 = call i32 @llvm.r600.read.tidig.x() #1
> +  %1 = call i32 @llvm.r600.read.tidig.y() #1
> +  %2 = sext i32 %0 to i64
> +  %3 = sext i32 %1 to i64
> +  br label %loop
> +
> +loop:
> +  %4 = phi i64 [0, %entry], [%5, %loop]
> +  %5 = add i64 %2, %4
> +  %6 = getelementptr i8 addrspace(1)* %in, i64 %5
> +  %7 = load i8 addrspace(1)* %6, align 1
> +  %8 = or i64 %5, 1
> +  %9 = getelementptr i8 addrspace(1)* %in, i64 %8
> +  %10 = load i8 addrspace(1)* %9, align 1
> +  %11 = add i8 %7, %10
> +  %12 = sext i8 %11 to i32
> +  store i32 %12, i32 addrspace(1)* %out
> +  %13 = icmp slt i64 %5, 10
> +  br i1 %13, label %loop, label %done
> +
> +done:
> +  ret void
> +}
> +
> +declare i32 @llvm.r600.read.tidig.x() #1
> +declare i32 @llvm.r600.read.tidig.y() #1
> +
> +attributes #1 = { nounwind readnone }
> -- 1.8.1.5
>
> 0003-R600-SI-Handle-MUBUF-instructions-in-SIInstrInfo-mov.patch
>
>
>  From 3e23edb6e49507bf2bf1da6d86c80ea13f1a0541 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 18 Mar 2014 20:58:27 -0400
> Subject: [PATCH 3/3] R600/SI: Handle MUBUF instructions in
>   SIInstrInfo::moveToVALU()
>
> ---
>   lib/Target/R600/AMDGPUTargetMachine.cpp |   3 +
>   lib/Target/R600/SIISelLowering.cpp      |   4 +-
>   lib/Target/R600/SIInstrFormats.td       |   1 +
>   lib/Target/R600/SIInstrInfo.cpp         | 135 +++++++++++++++++++++++++++++++-
>   lib/Target/R600/SIInstrInfo.h           |  10 +++
>   test/CodeGen/R600/salu-to-valu.ll       |   7 +-
>   6 files changed, 155 insertions(+), 5 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 7f50428..b11fce3 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -165,6 +165,9 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
>       addPass(createR600VectorRegMerger(*TM));
>     } else {
>       addPass(createSIFixSGPRCopiesPass(*TM));
> +    // SIFixSGPRCopies can generate a lot of duplicate instructions,
> +    // so we need to run MachineCSE afterwards.
> +    addPass(&MachineCSEID);
>     }
>     return false;
>   }
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 52e5a16..fd1e3a6 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -25,8 +25,6 @@
>   #include "llvm/CodeGen/SelectionDAG.h"
>   #include "llvm/IR/Function.h"
>   
> -const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
> -
>   using namespace llvm;
>   
>   SITargetLowering::SITargetLowering(TargetMachine &TM) :
> @@ -407,7 +405,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
>       BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
>               .addImm(0);
>       BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
> -            .addImm(RSRC_DATA_FORMAT >> 32);
> +            .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
>       BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
>               .addReg(SubRegHiLo)
>               .addImm(AMDGPU::sub0)
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 53ebaaf..aa2c22c 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -369,6 +369,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
>     let EXP_CNT = 1;
>   
>     let neverHasSideEffects = 1;
> +  let UseNamedOperandTable = 1;
>   }
>   
>   class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 0401f25..68332ae 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -555,6 +555,31 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
>     MO.ChangeToRegister(Reg, false);
>   }
>   
> +unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
> +                                         MachineRegisterInfo &MRI,
> +                                         MachineOperand &SuperReg,
> +                                         const TargetRegisterClass *SuperRC,
> +                                         unsigned SubIdx,
> +                                         const TargetRegisterClass *SubRC)
> +                                         const {
> +  assert(SuperReg.isReg());
> +
> +  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
> +  unsigned SubReg = MRI.createVirtualRegister(SubRC);
> +
> +  // Just in case the super register is itself a sub-register, copy it to a new
> +  // value so we don't need to wory about merging its subreg index with the
> +  // SubIdx passed to this function.  The register coalescer should be able to
> +  // eliminate this extra copy.
> +  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::COPY),
> +          NewSuperReg)
> +          .addOperand(SuperReg);
> +
> +  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::COPY), SubReg)
> +          .addReg(NewSuperReg, 0, SubIdx);
> +  return SubReg;
> +}

Can you use TargetOpcode::COPY instead (and the same for the others)? 
It's the same thing but makes it clearer it's not a target specific 
thing. It makes it easier to find when looking for examples for using 
those special instructions.

> +
>   void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
>     MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
>     int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> @@ -672,6 +697,110 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
>         MI->getOperand(i).setReg(DstReg);
>       }
>     }
> +
> +  // Legalize MUBUF* instructions
> +  // FIXME: If we start using the non-addr64 instructions for compute, we
> +  // may need to legalize them here.
> +
> +  int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> +                                            AMDGPU::OpName::srsrc);
> +  int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> +                                             AMDGPU::OpName::vaddr);
> +  if (SRsrcIdx != -1 && VAddrIdx != -1) {
> +    const TargetRegisterClass *VAddrRC =
> +        RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
> +
> +    if(VAddrRC->getSize() == 8 &&
> +       MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
> +      // We have a MUBUF instruction that uses a 64-bit vaddr register and
> +      // srsrc has the incorrect register class.  In order to fix this, we
> +      // need to extract the pointer from the resource descriptor (srsrc),
> +      // add it to the value of vadd,  then store the result in the vaddr
> +      // operand.  Then, we need to set the pointer field of the resource
> +      // descriptor to zero.
> +
> +      MachineBasicBlock &MBB = *MI->getParent();
> +      MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
> +      MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
> +      unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
> +      unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> +      unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> +      unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
> +      unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> +      unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> +      unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> +      unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
> +
> +      // SRsrcPtrLo = srsrc:sub0
> +      SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
> +          &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
> +
> +      // SRsrcPtrHi = srsrc:sub1
> +      SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
> +          &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
> +
> +      // VAddrLo = vaddr:sub0
> +      VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
> +          &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
> +
> +      // VAddrHi = vaddr:sub1
> +      VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
> +          &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
> +
> +      // NewVaddrLo = SRsrcPtrLo + VAddrLo
> +      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
> +              NewVAddrLo)
> +              .addReg(SRsrcPtrLo)
> +              .addReg(VAddrLo)
> +              .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
> +
> +      // NewVaddrHi = SRsrcPtrHi + VAddrHi
> +      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
> +              NewVAddrHi)
> +              .addReg(SRsrcPtrHi)
> +              .addReg(VAddrHi)
> +              .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
> +              .addReg(AMDGPU::VCC, RegState::Implicit);
> +
> +      // NewVaddr = {NewVaddrHi, NewVaddrLo}
> +      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
> +              NewVAddr)
> +              .addReg(NewVAddrLo)
> +              .addImm(AMDGPU::sub0)
> +              .addReg(NewVAddrHi)
> +              .addImm(AMDGPU::sub1);
> +
> +      // Zero64 = 0
> +      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
> +              Zero64)
> +              .addImm(0);
> +
> +      // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
> +      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
> +              SRsrcFormatLo)
> +              .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
> +
> +      // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
> +      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
> +              SRsrcFormatHi)
> +              .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
> +
> +      // NewSRsrc = {Zero64, SRsrcFormat}
> +      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
> +              NewSRsrc)
> +              .addReg(Zero64)
> +              .addImm(AMDGPU::sub0_sub1)
> +              .addReg(SRsrcFormatLo)
> +              .addImm(AMDGPU::sub2)
> +              .addReg(SRsrcFormatHi)
> +              .addImm(AMDGPU::sub3);
> +
> +      // Update the instruction to use NewVaddr
> +      MI->getOperand(VAddrIdx).setReg(NewVAddr);
> +      // Update the instruction to use NewSRsrc
> +      MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
> +    }
> +  }
>   }
>   
>   void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
> @@ -736,8 +865,12 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
>       }
>   
>       unsigned NewOpcode = getVALUOp(*Inst);
> -    if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
> +    if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
> +      // We cannot move this instruction to the VALU, so we should try to
> +      // legalize its operands instead.
> +      legalizeOperands(Inst);
>         continue;
> +    }
>   
>       // Use the new VALU Opcode.
>       const MCInstrDesc &NewDesc = get(NewOpcode);
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index bff89d3..96df7c4 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -31,6 +31,13 @@ private:
>                                                unsigned MovRelOp,
>                                                unsigned Dst,
>                                                unsigned Src0) const;
> +
> +  unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
> +                              MachineRegisterInfo &MRI,
> +                              MachineOperand &SuperReg,
> +                              const TargetRegisterClass *SuperRC,
> +                              unsigned SubIdx,
> +                              const TargetRegisterClass *SubRC) const;
>   public:
>     explicit SIInstrInfo(AMDGPUTargetMachine &tm);
>   
> @@ -148,6 +155,9 @@ namespace AMDGPU {
>     int getCommuteRev(uint16_t Opcode);
>     int getCommuteOrig(uint16_t Opcode);
>   
> +  const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
> +
> +
>   } // End namespace AMDGPU
>   
>   } // End namespace llvm
> diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
> index c989c9d..b2aa534 100644
> --- a/test/CodeGen/R600/salu-to-valu.ll
> +++ b/test/CodeGen/R600/salu-to-valu.ll
> @@ -1,4 +1,4 @@
> -; RUN: llc < %s -march=r600 -mcpu=SI  | FileCheck %s
> +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
>   
>   ; In this test both the pointer and the offset operands to the
>   ; BUFFER_LOAD instructions end up being stored in vgprs.  This
> @@ -8,8 +8,13 @@
>   ; (low 64-bits of srsrc).
>   
>   ; CHECK-LABEL: @mubuf
> +
>   ; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
>   ; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
> +
> +; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
> +; instructions
> +; CHECK-NOT: BUFFER_LOAD_UBYTE v{{[0-9]+}}, v
>   define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
>   entry:
>     %0 = call i32 @llvm.r600.read.tidig.x() #1

I think it would be better if there were two positive checks for an SGPR 
and an immediate, rather than not v

-Matt
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140318/b59681a6/attachment.html>