PATCH: R600/SI: Add ComplexPattern to match MUBUF variant with no VADDR
Matt Arsenault
Matthew.Arsenault at amd.com
Wed Aug 6 15:17:47 PDT 2014
On 08/06/2014 11:29 AM, Tom Stellard wrote:
> Hi,
>
> The attached patches add a ComplexPattern that will enable a no-vaddr
> variant of MUBUF instructions to be used for loads and stores with either
> no offset, or an immediate offset that can be folded into the instruction.
> This change will save use 2 VGPRs in these scenarios.
>
> -Tom
>
> 0001-R600-SI-Clear-lds-bit-on-MUBUF-instructions-used-for.patch
>
>
> From 947090335ef64e7894a5f94ca2dd42ecf8757048 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Wed, 6 Aug 2014 11:42:14 -0400
> Subject: [PATCH 1/4] R600/SI: Clear lds bit on MUBUF instructions used for
> private stores
>
> This bit was left uninitialized, which was causing some random failures
> of piglit tests.
>
> NOTE: This is a candidate for the 3.5 branch.
> ---
> lib/Target/R600/SIInstrInfo.td | 1 +
> test/CodeGen/R600/private-memory.ll | 16 ++++++++--------
> 2 files changed, 9 insertions(+), 8 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index d2bce85..7bc8705 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -974,6 +974,7 @@ multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass
> name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#"$glc"#"$slc"#"$tfe",
> []
> > {
> + let lds = 0;
> let addr64 = 0;
> }
Should the base class MUBUF set this instead?
>
> diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
> index a69ca21..b0f9c98 100644
> --- a/test/CodeGen/R600/private-memory.ll
> +++ b/test/CodeGen/R600/private-memory.ll
> @@ -1,6 +1,6 @@
> ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
> -; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
> -; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
> +; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
> +; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
>
> declare i32 @llvm.r600.read.tidig.x() nounwind readnone
>
> @@ -16,8 +16,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
> ; SI-PROMOTE: DS_READ_B32
> ; SI-PROMOTE: DS_READ_B32
>
> -; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
> -; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
> +; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
> +; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
> define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
> entry:
> %stack = alloca [5 x i32], align 4
> @@ -116,8 +116,8 @@ for.end:
>
> ; R600: MOVA_INT
>
> -; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
> -; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
> +; SI-PROMOTE-DAG: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
> +; SI-PROMOTE-DAG: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x2 ; encoding: [0x02,0x10,0x68,0xe0
> ; SI_PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
Another broken SI_PROMOTE check line
> ---
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 147 ++++++++++++++++++----------
> lib/Target/R600/SIInstrInfo.cpp | 174 +++++++++++++++++++--------------
> lib/Target/R600/SIInstrInfo.h | 1 +
> lib/Target/R600/SIInstrInfo.td | 37 ++++++-
> test/CodeGen/R600/ctpop.ll | 4 +-
> test/CodeGen/R600/ctpop64.ll | 2 +-
> test/CodeGen/R600/extload.ll | 12 +--
> test/CodeGen/R600/mubuf.ll | 12 +--
> test/CodeGen/R600/sext-in-reg.ll | 6 +-
> test/CodeGen/R600/trunc.ll | 4 +-
> test/CodeGen/R600/zero_extend.ll | 2 +-
> 11 files changed, 256 insertions(+), 145 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index 090fd1d..88390e8 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -88,13 +88,16 @@ private:
> SDValue& Offset);
> bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
> bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
> - bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset,
> - SDValue &ImmOffset) const;
> + void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
> + SDValue &SOffset, SDValue &Offset, SDValue &Offen,
> + SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
> + SDValue &TFE) const;
> + bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
> + SDValue &Offset) const;
> bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
> SDValue &SOffset, SDValue &ImmOffset) const;
> - bool SelectMUBUFAddr32(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
> - SDValue &SOffset, SDValue &Offset, SDValue &Offen,
> - SDValue &Idxen, SDValue &GLC, SDValue &SLC,
> + bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
> + SDValue &Offset, SDValue &GLC, SDValue &SLC,
> SDValue &TFE) const;
> bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
> bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
> @@ -747,11 +750,23 @@ static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
> return isUInt<12>(Imm->getZExtValue());
> }
>
> -bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
> - SDValue &Offset,
> - SDValue &ImmOffset) const {
> +void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
> + SDValue &VAddr, SDValue &SOffset,
> + SDValue &Offset, SDValue &Offen,
> + SDValue &Idxen, SDValue &Addr64,
> + SDValue &GLC, SDValue &SLC,
> + SDValue &TFE) const {
> SDLoc DL(Addr);
>
> + GLC = CurDAG->getTargetConstant(0, MVT::i1);
> + SLC = CurDAG->getTargetConstant(0, MVT::i1);
> + TFE = CurDAG->getTargetConstant(0, MVT::i1);
> +
> + Idxen = CurDAG->getTargetConstant(0, MVT::i1);
> + Offen = CurDAG->getTargetConstant(0, MVT::i1);
> + Addr64 = CurDAG->getTargetConstant(0, MVT::i1);
> + SOffset = CurDAG->getTargetConstant(0, MVT::i32);
> +
> if (CurDAG->isBaseWithConstantOffset(Addr)) {
> SDValue N0 = Addr.getOperand(0);
> SDValue N1 = Addr.getOperand(1);
> @@ -760,59 +775,88 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
> if (isLegalMUBUFImmOffset(C1)) {
>
> if (N0.getOpcode() == ISD::ADD) {
> - // (add (add N2, N3), C1)
> + // (add (add N2, N3), C1) -> addr64
> SDValue N2 = N0.getOperand(0);
> SDValue N3 = N0.getOperand(1);
> - Ptr = wrapAddr64Rsrc(CurDAG, DL, N2);
> - Offset = N3;
> - ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
> - return true;
> + Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
> + Ptr = N2;
> + VAddr = N3;
> + Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
> + return;
> }
>
> - // (add N0, C1)
> - Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));;
> - Offset = N0;
> - ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
> - return true;
> + // (add N0, C1) -> offset
> + VAddr = CurDAG->getTargetConstant(0, MVT::i32);
> + Ptr = N0;
> + Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
> + return;
> }
> }
> if (Addr.getOpcode() == ISD::ADD) {
> - // (add N0, N1)
> + // (add N0, N1) -> addr64
> SDValue N0 = Addr.getOperand(0);
> SDValue N1 = Addr.getOperand(1);
> - Ptr = wrapAddr64Rsrc(CurDAG, DL, N0);
> - Offset = N1;
> - ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
> - return true;
> + Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
> + Ptr = N0;
> + VAddr = N1;
> + Offset = CurDAG->getTargetConstant(0, MVT::i16);
> + return;
> }
>
> - // default case
> - Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64));
> - Offset = Addr;
> - ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
> - return true;
> + // default case -> offset
> + VAddr = CurDAG->getTargetConstant(0, MVT::i32);
> + Ptr = Addr;
> + Offset = CurDAG->getTargetConstant(0, MVT::i16);
> +
> }
>
> -/// \brief Return a resource descriptor with the 'Add TID' bit enabled
> -/// The TID (Thread ID) is multipled by the stride value (bits [61:48]
> -/// of the resource descriptor) to create an offset, which is added to the
> -/// resource ponter.
> -static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
> +bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
> + SDValue &VAddr,
> + SDValue &Offset) const {
> + SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE;
>
> - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
> - 0xffffffff;
> + SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
> + GLC, SLC, TFE);
> +
> + ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
> + if (C->getSExtValue()) {
> + SDLoc DL(Addr);
> + SRsrc = wrapAddr64Rsrc(CurDAG, DL, Ptr);
> + return true;
> + }
> + return false;
> +}
> +
> +static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
> + uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
>
> SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
> SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
> + if (RsrcDword1)
> + PtrHi = SDValue(DAG->getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
> + DAG->getConstant(RsrcDword1, MVT::i32)), 0);
> +
> SDValue DataLo = DAG->getTargetConstant(
> - Rsrc & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32);
> - SDValue DataHi = DAG->getTargetConstant(Rsrc >> 32, MVT::i32);
> + RsrcDword2And3 & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32);
> + SDValue DataHi = DAG->getTargetConstant(RsrcDword2And3 >> 32, MVT::i32);
>
> const SDValue Ops[] = { PtrLo, PtrHi, DataLo, DataHi };
> return SDValue(DAG->getMachineNode(AMDGPU::SI_BUFFER_RSRC, DL,
> MVT::v4i32, Ops), 0);
> }
>
> +/// \brief Return a resource descriptor with the 'Add TID' bit enabled
> +/// The TID (Thread ID) is multipled by the stride value (bits [61:48]
> +/// of the resource descriptor) to create an offset, which is added to the
> +/// resource ponter.
> +static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
> +
> + uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
> + 0xffffffff; // Size
> +
> + return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
> +}
> +
> bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
> SDValue &VAddr, SDValue &SOffset,
> SDValue &ImmOffset) const {
> @@ -866,20 +910,25 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
> return true;
> }
>
> -bool AMDGPUDAGToDAGISel::SelectMUBUFAddr32(SDValue Addr, SDValue &SRsrc,
> - SDValue &VAddr, SDValue &SOffset,
> - SDValue &Offset, SDValue &Offen,
> - SDValue &Idxen, SDValue &GLC,
> - SDValue &SLC, SDValue &TFE) const {
> +bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
> + SDValue &SOffset, SDValue &Offset,
> + SDValue &GLC, SDValue &SLC,
> + SDValue &TFE) const {
> + SDValue Ptr, VAddr, Offen, Idxen, Addr64;
>
> - GLC = CurDAG->getTargetConstant(0, MVT::i1);
> - SLC = CurDAG->getTargetConstant(0, MVT::i1);
> - TFE = CurDAG->getTargetConstant(0, MVT::i1);
> + SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
> + GLC, SLC, TFE);
>
> - Idxen = CurDAG->getTargetConstant(0, MVT::i1);
> - Offen = CurDAG->getTargetConstant(1, MVT::i1);
> -
> - return SelectMUBUFScratch(Addr, SRsrc, VAddr, SOffset, Offset);
> + if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
> + !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
> + !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
> + uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT |
> + APInt::getAllOnesValue(32).getZExtValue(); // Size
> + SDLoc DL(Addr);
> + SRsrc = buildRSRC(CurDAG, DL, Ptr, 0, Rsrc);
> + return true;
> + }
> + return false;
> }
>
> bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 3d9714e..f5a12c5 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -1086,50 +1086,76 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
> // Legalize MUBUF* instructions
> // FIXME: If we start using the non-addr64 instructions for compute, we
> // may need to legalize them here.
> + int SRsrcIdx =
> + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
> + if (SRsrcIdx != -1) {
> + // We have an MUBUF instruction
> + MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
> + unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
> + if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
> + RI.getRegClass(SRsrcRC)))
> + // The operands are legal.
> + // FIXME: We may need to legalize operands besided srsrc.
> + return;
Need braces here
>
> - int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> - AMDGPU::OpName::srsrc);
> - int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> - AMDGPU::OpName::vaddr);
> - if (SRsrcIdx != -1 && VAddrIdx != -1) {
> - const TargetRegisterClass *VAddrRC =
> - RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
> -
> - if(VAddrRC->getSize() == 8 &&
> - MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
> - // We have a MUBUF instruction that uses a 64-bit vaddr register and
> - // srsrc has the incorrect register class. In order to fix this, we
> - // need to extract the pointer from the resource descriptor (srsrc),
> - // add it to the value of vadd, then store the result in the vaddr
> - // operand. Then, we need to set the pointer field of the resource
> - // descriptor to zero.
> + MachineBasicBlock &MBB = *MI->getParent();
> + // Extract the the ptr from the resource descriptor.
>
> - MachineBasicBlock &MBB = *MI->getParent();
> - MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
> - MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
> - unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
> - unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> - unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> - unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
> - unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> - unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> - unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> - unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
> -
> - // SRsrcPtrLo = srsrc:sub0
> - SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
> - &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
> -
> - // SRsrcPtrHi = srsrc:sub1
> - SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
> - &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
> + // SRsrcPtrLo = srsrc:sub0
> + unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
> + &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
> +
> + // SRsrcPtrHi = srsrc:sub1
> + unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
> + &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
> +
> + // Create an empty resource descriptor
> + unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
> + unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> + unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> + unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
> +
> + // Zero64 = 0
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
> + Zero64)
> + .addImm(0);
> +
> + // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
> + SRsrcFormatLo)
> + .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
> +
> + // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
> + SRsrcFormatHi)
> + .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
> +
> + // NewSRsrc = {Zero64, SRsrcFormat}
> + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
> + NewSRsrc)
> + .addReg(Zero64)
> + .addImm(AMDGPU::sub0_sub1)
> + .addReg(SRsrcFormatLo)
> + .addImm(AMDGPU::sub2)
> + .addReg(SRsrcFormatHi)
> + .addImm(AMDGPU::sub3);
> +
> + MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
> + unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
> + unsigned NewVAddrLo;
> + unsigned NewVAddrHi;
> + if (VAddr) {
> + // This is already an ADDR64 instruction so we need to add the pointer
> + // extracted from the resource descriptor to the current value of VAddr.
> + NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> + NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
>
> // VAddrLo = vaddr:sub0
> - VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
> + unsigned VAddrLo = buildExtractSubReg(MI, MRI, *VAddr,
> &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
I don't think you need to use buildExtractSubReg. You should be able to
specify the subreg index to .addReg
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140806/42c8023e/attachment.html>
More information about the llvm-commits
mailing list