<html>
  <head>
    <meta content="text/html; charset=ISO-8859-1"
      http-equiv="Content-Type">
  </head>
  <body bgcolor="#FFFFFF" text="#000000">
    <div class="moz-cite-prefix">On 03/18/2014 02:38 PM, Tom Stellard
      wrote:<br>
    </div>
    <blockquote cite="mid:20140318213813.GE2169@yyz" type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">Hi,

The attached patches teach SIInstrInfo::moveToVALU() how to handle MUBUF
instructions.  These instructions are already executed by the VALU, but
we sometimes need to legalize the srsrc operand if its defining instruction
has been moved from the SALU to the VALU.

-Tom
</pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0001-R600-SI-Use-SGPR_-32-64-reg-clases-when-lowering-SI_.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From 43de42a6982230dcc7aa729da60506de94016998 Mon Sep 17 00:00:00 2001
From: Tom Stellard <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:thomas.stellard@amd.com"><thomas.stellard@amd.com></a>
Date: Tue, 18 Mar 2014 20:59:34 -0400
Subject: [PATCH 1/3] R600/SI: Use SGPR_(32|64) reg clases when lowering
 SI_ADDR64_RSRC

The SReg_(32|64) register classes contain special registers in addition
to the numbered SGPRs.  This can lead to machine verifier errors when
these register classes are used as sub-registers for SReg_128, since
SReg_128 only uses the numbered SGPRs.

Replacing SReg_(32|64) with SGPR_(32|64) fixes this problem, since
the SGPR_(32|64) register classes contain only numbered SGPRs.

Tests cases for this are comming in a later commit.
---
 lib/Target/R600/SIISelLowering.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 8cf1b82..52e5a16 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -398,10 +398,10 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
       static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
     MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
     unsigned SuperReg = MI->getOperand(0).getReg();
-    unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-    unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-    unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-    unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+    unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
+    unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
+    unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+    unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
     BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo)
             .addOperand(MI->getOperand(1));
     BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
<div class="moz-txt-sig">-- 
1.8.1.5

</div></pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0002-R600-SI-Handle-S_MOV_B64-in-SIInstrInfo-moveToVALU.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From f6694e5ba5a50b135950a29c66fea4664210c750 Mon Sep 17 00:00:00 2001
From: Tom Stellard <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:thomas.stellard@amd.com"><thomas.stellard@amd.com></a>
Date: Tue, 18 Mar 2014 21:54:07 -0400
Subject: [PATCH 2/3] R600/SI: Handle S_MOV_B64 in SIInstrInfo::moveToVALU()

---
 lib/Target/R600/SIInstrInfo.cpp   | 57 +++++++++++++++++++++++++++++++++++++--
 test/CodeGen/R600/salu-to-valu.ll | 42 +++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 test/CodeGen/R600/salu-to-valu.ll

diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 3ed8dfa..0401f25 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -680,12 +680,65 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
 
   while (!Worklist.empty()) {
     MachineInstr *Inst = Worklist.pop_back_val();
+    MachineBasicBlock *MBB = Inst->getParent();
+    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+    // Handle some special cases
+    switch(Inst->getOpcode()) {
+      case AMDGPU::S_MOV_B64: {
+        DebugLoc DL = Inst->getDebugLoc();
+
+        // If the source operand is a register we can replace this with a
+        // copy
+        if (Inst->getOperand(1).isReg()) {
+          MachineInstr *Copy = BuildMI(*MBB, Inst, DL,
+                                       get(AMDGPU::COPY))
+                                       .addOperand(Inst->getOperand(0))
+                                       .addOperand(Inst->getOperand(1));
+          Worklist.push_back(Copy);
+        } else {
+          // Otherwise, we need to split this into two movs, because there is
+          // no 64-bit VALU move instruction.
+          unsigned LoSrc, HiSrc, LoDst, HiDst, Dst;
+          LoSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+          LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+          HiSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+          HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+          Dst = MRI.createVirtualRegister(
+              MRI.getRegClass(Inst->getOperand(0).getReg()));
+
+          BuildMI(*MBB, Inst, DL, get(AMDGPU::EXTRACT_SUBREG), LoSrc)
+                  .addOperand(Inst->getOperand(1))
+                  .addImm(AMDGPU::sub0);
+          BuildMI(*MBB, Inst, DL, get(AMDGPU::EXTRACT_SUBREG), HiSrc)
+                  .addOperand(Inst->getOperand(1))
+                  .addImm(AMDGPU::sub1);
+          MachineInstr *Lo = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32),
+                                     LoDst)
+                                    .addReg(LoSrc);
+          MachineInstr *Hi = BuildMI(*MBB, Inst, DL, get(AMDGPU::S_MOV_B32),
+                                     HiDst)
+                                     .addReg(HiSrc);
+
+          BuildMI(*MBB, Inst, DL, get(AMDGPU::REG_SEQUENCE), Dst)
+                  .addReg(LoDst)
+                  .addImm(AMDGPU::sub0)
+                  .addReg(HiDst)
+                  .addImm(AMDGPU::sub1);
+
+          MRI.replaceRegWith(Inst->getOperand(0).getReg(), Dst);
+          Worklist.push_back(Lo);
+          Worklist.push_back(Hi);
+        }
+        Inst->eraseFromParent();
+        continue;
+      }
+    }
+
     unsigned NewOpcode = getVALUOp(*Inst);
     if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
       continue;
 
-    MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();
-
     // Use the new VALU Opcode.
     const MCInstrDesc &NewDesc = get(NewOpcode);
     Inst->setDesc(NewDesc);
diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
new file mode 100644
index 0000000..c989c9d
--- /dev/null
+++ b/test/CodeGen/R600/salu-to-valu.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=r600 -mcpu=SI  | FileCheck %s
+
+; In this test both the pointer and the offset operands to the
+; BUFFER_LOAD instructions end up being stored in vgprs.  This
+; requires us to add the pointer and offset together, store the
+; result in the offset operand (vaddr), and then store 0 in an
+; sgpr register pair and use that for the pointer operand
+; (low 64-bits of srsrc).
+
+; CHECK-LABEL: @mubuf
+; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
+; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
+define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+entry:
+  %0 = call i32 @llvm.r600.read.tidig.x() #1
+  %1 = call i32 @llvm.r600.read.tidig.y() #1
+  %2 = sext i32 %0 to i64
+  %3 = sext i32 %1 to i64
+  br label %loop
+
+loop:
+  %4 = phi i64 [0, %entry], [%5, %loop]
+  %5 = add i64 %2, %4
+  %6 = getelementptr i8 addrspace(1)* %in, i64 %5
+  %7 = load i8 addrspace(1)* %6, align 1
+  %8 = or i64 %5, 1
+  %9 = getelementptr i8 addrspace(1)* %in, i64 %8
+  %10 = load i8 addrspace(1)* %9, align 1
+  %11 = add i8 %7, %10
+  %12 = sext i8 %11 to i32
+  store i32 %12, i32 addrspace(1)* %out
+  %13 = icmp slt i64 %5, 10
+  br i1 %13, label %loop, label %done
+
+done:
+  ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.r600.read.tidig.y() #1
+
+attributes #1 = { nounwind readnone }
<div class="moz-txt-sig">-- 
1.8.1.5

</div></pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0003-R600-SI-Handle-MUBUF-instructions-in-SIInstrInfo-mov.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From 3e23edb6e49507bf2bf1da6d86c80ea13f1a0541 Mon Sep 17 00:00:00 2001
From: Tom Stellard <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:thomas.stellard@amd.com"><thomas.stellard@amd.com></a>
Date: Tue, 18 Mar 2014 20:58:27 -0400
Subject: [PATCH 3/3] R600/SI: Handle MUBUF instructions in
 SIInstrInfo::moveToVALU()

---
 lib/Target/R600/AMDGPUTargetMachine.cpp |   3 +
 lib/Target/R600/SIISelLowering.cpp      |   4 +-
 lib/Target/R600/SIInstrFormats.td       |   1 +
 lib/Target/R600/SIInstrInfo.cpp         | 135 +++++++++++++++++++++++++++++++-
 lib/Target/R600/SIInstrInfo.h           |  10 +++
 test/CodeGen/R600/salu-to-valu.ll       |   7 +-
 6 files changed, 155 insertions(+), 5 deletions(-)

diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 7f50428..b11fce3 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -165,6 +165,9 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
     addPass(createR600VectorRegMerger(*TM));
   } else {
     addPass(createSIFixSGPRCopiesPass(*TM));
+    // SIFixSGPRCopies can generate a lot of duplicate instructions,
+    // so we need to run MachineCSE afterwards.
+    addPass(&MachineCSEID);
   }
   return false;
 }
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 52e5a16..fd1e3a6 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -25,8 +25,6 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/IR/Function.h"
 
-const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
-
 using namespace llvm;
 
 SITargetLowering::SITargetLowering(TargetMachine &TM) :
@@ -407,7 +405,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
             .addImm(0);
     BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
-            .addImm(RSRC_DATA_FORMAT >> 32);
+            .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
     BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
             .addReg(SubRegHiLo)
             .addImm(AMDGPU::sub0)
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 53ebaaf..aa2c22c 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -369,6 +369,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
   let EXP_CNT = 1;
 
   let neverHasSideEffects = 1;
+  let UseNamedOperandTable = 1;
 }
 
 class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 0401f25..68332ae 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -555,6 +555,31 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
   MO.ChangeToRegister(Reg, false);
 }
 
+unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
+                                         MachineRegisterInfo &MRI,
+                                         MachineOperand &SuperReg,
+                                         const TargetRegisterClass *SuperRC,
+                                         unsigned SubIdx,
+                                         const TargetRegisterClass *SubRC)
+                                         const {
+  assert(SuperReg.isReg());
+
+  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+  unsigned SubReg = MRI.createVirtualRegister(SubRC);
+
+  // Just in case the super register is itself a sub-register, copy it to a new
+  // value so we don't need to wory about merging its subreg index with the
+  // SubIdx passed to this function.  The register coalescer should be able to
+  // eliminate this extra copy.
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::COPY),
+          NewSuperReg)
+          .addOperand(SuperReg);
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::COPY), SubReg)
+          .addReg(NewSuperReg, 0, SubIdx);
+  return SubReg;
+}</pre>
      </div>
    </blockquote>
    <br>
    Can you use TargetOpcode::COPY instead (and the same for the
    others)? It's the same thing but makes it clearer it's not a target
    specific thing. It makes it easier to find when looking for examples
    for using those special instructions.<br>
    <br>
    <blockquote cite="mid:20140318213813.GE2169@yyz" type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">
+
 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
   MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
   int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
@@ -672,6 +697,110 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
       MI->getOperand(i).setReg(DstReg);
     }
   }
+
+  // Legalize MUBUF* instructions
+  // FIXME: If we start using the non-addr64 instructions for compute, we
+  // may need to legalize them here.
+
+  int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                            AMDGPU::OpName::srsrc);
+  int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                             AMDGPU::OpName::vaddr);
+  if (SRsrcIdx != -1 && VAddrIdx != -1) {
+    const TargetRegisterClass *VAddrRC =
+        RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
+
+    if(VAddrRC->getSize() == 8 &&
+       MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
+      // We have a MUBUF instruction that uses a 64-bit vaddr register and
+      // srsrc has the incorrect register class.  In order to fix this, we
+      // need to extract the pointer from the resource descriptor (srsrc),
+      // add it to the value of vadd,  then store the result in the vaddr
+      // operand.  Then, we need to set the pointer field of the resource
+      // descriptor to zero.
+
+      MachineBasicBlock &MBB = *MI->getParent();
+      MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
+      MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
+      unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
+      unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+      unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+      unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+      unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+      unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+      unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+      unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+
+      // SRsrcPtrLo = srsrc:sub0
+      SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
+          &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+      // SRsrcPtrHi = srsrc:sub1
+      SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
+          &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+      // VAddrLo = vaddr:sub0
+      VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
+          &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+      // VAddrHi = vaddr:sub1
+      VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
+          &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+      // NewVaddrLo = SRsrcPtrLo + VAddrLo
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
+              NewVAddrLo)
+              .addReg(SRsrcPtrLo)
+              .addReg(VAddrLo)
+              .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
+
+      // NewVaddrHi = SRsrcPtrHi + VAddrHi
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
+              NewVAddrHi)
+              .addReg(SRsrcPtrHi)
+              .addReg(VAddrHi)
+              .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
+              .addReg(AMDGPU::VCC, RegState::Implicit);
+
+      // NewVaddr = {NewVaddrHi, NewVaddrLo}
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+              NewVAddr)
+              .addReg(NewVAddrLo)
+              .addImm(AMDGPU::sub0)
+              .addReg(NewVAddrHi)
+              .addImm(AMDGPU::sub1);
+
+      // Zero64 = 0
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
+              Zero64)
+              .addImm(0);
+
+      // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+              SRsrcFormatLo)
+              .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+
+      // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+              SRsrcFormatHi)
+              .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+
+      // NewSRsrc = {Zero64, SRsrcFormat}
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+              NewSRsrc)
+              .addReg(Zero64)
+              .addImm(AMDGPU::sub0_sub1)
+              .addReg(SRsrcFormatLo)
+              .addImm(AMDGPU::sub2)
+              .addReg(SRsrcFormatHi)
+              .addImm(AMDGPU::sub3);
+
+      // Update the instruction to use NewVaddr
+      MI->getOperand(VAddrIdx).setReg(NewVAddr);
+      // Update the instruction to use NewSRsrc
+      MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
+    }
+  }
 }
 
 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
@@ -736,8 +865,12 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
     }
 
     unsigned NewOpcode = getVALUOp(*Inst);
-    if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
+    if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
+      // We cannot move this instruction to the VALU, so we should try to
+      // legalize its operands instead.
+      legalizeOperands(Inst);
       continue;
+    }
 
     // Use the new VALU Opcode.
     const MCInstrDesc &NewDesc = get(NewOpcode);
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index bff89d3..96df7c4 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -31,6 +31,13 @@ private:
                                              unsigned MovRelOp,
                                              unsigned Dst,
                                              unsigned Src0) const;
+
+  unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
+                              MachineRegisterInfo &MRI,
+                              MachineOperand &SuperReg,
+                              const TargetRegisterClass *SuperRC,
+                              unsigned SubIdx,
+                              const TargetRegisterClass *SubRC) const;
 public:
   explicit SIInstrInfo(AMDGPUTargetMachine &tm);
 
@@ -148,6 +155,9 @@ namespace AMDGPU {
   int getCommuteRev(uint16_t Opcode);
   int getCommuteOrig(uint16_t Opcode);
 
+  const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
+
+
 } // End namespace AMDGPU
 
 } // End namespace llvm
diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
index c989c9d..b2aa534 100644
--- a/test/CodeGen/R600/salu-to-valu.ll
+++ b/test/CodeGen/R600/salu-to-valu.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=SI  | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
 
 ; In this test both the pointer and the offset operands to the
 ; BUFFER_LOAD instructions end up being stored in vgprs.  This
@@ -8,8 +8,13 @@
 ; (low 64-bits of srsrc).
 
 ; CHECK-LABEL: @mubuf
+
 ; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
 ; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
+
+; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
+; instructions
+; CHECK-NOT: BUFFER_LOAD_UBYTE v{{[0-9]+}}, v
 define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.x() #1
</pre>
      </div>
    </blockquote>
    <br>
    I think it would be better if there were two positive checks for an
    SGPR and an immediate, rather than not v<br>
    <br>
    <br>
    -Matt<br>
  </body>
</html>