[llvm] r282999 - AMDGPU: Don't use offen if it is 0

Mehdi Amini via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 30 19:45:47 PDT 2016


Hi Matt,

Tests are not passing:
 http://lab.llvm.org:8011/builders/clang-x86_64-linux-selfhost-modules/builds/20038 <http://lab.llvm.org:8011/builders/clang-x86_64-linux-selfhost-modules/builds/20038> http://lab.llvm.org:8011/builders/clang-with-lto-ubuntu/builds/587 <http://lab.llvm.org:8011/builders/clang-with-lto-ubuntu/builds/587>

I reverted in r283003.

— 
Mehdi


> On Sep 30, 2016, at 6:37 PM, Matt Arsenault via llvm-commits <llvm-commits at lists.llvm.org> wrote:
> 
> Author: arsenm
> Date: Fri Sep 30 20:37:15 2016
> New Revision: 282999
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=282999&view=rev
> Log:
> AMDGPU: Don't use offen if it is 0
> 
> This removes many re-initializations of a base register to 0.
> 
> Modified:
>    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
>    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
>    llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
>    llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll
>    llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
>    llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll
>    llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
>    llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
>    llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll
>    llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll
>    llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
>    llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
> 
> Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Fri Sep 30 20:37:15 2016
> @@ -320,14 +320,82 @@ static unsigned getNumSubRegsForSpillOp(
>   }
> }
> 
> -void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
> -                                           unsigned LoadStoreOp,
> -                                           const MachineOperand *SrcDst,
> -                                           unsigned ScratchRsrcReg,
> -                                           unsigned ScratchOffset,
> -                                           int64_t Offset,
> -                                           RegScavenger *RS) const {
> +static int getOffsetMUBUFStore(unsigned Opc) {
> +  switch (Opc) {
> +  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
> +    return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
> +  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
> +    return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
> +  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
> +    return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
> +  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
> +    return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
> +  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
> +    return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
> +  default:
> +    return -1;
> +  }
> +}
> 
> +static int getOffsetMUBUFLoad(unsigned Opc) {
> +  switch (Opc) {
> +  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
> +    return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
> +  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
> +    return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
> +  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
> +    return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
> +  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
> +    return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
> +  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
> +    return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
> +  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
> +    return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
> +  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
> +    return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
> +  default:
> +    return -1;
> +  }
> +}
> +
> +// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
> +// need to handle the case where an SGPR may need to be spilled while spilling.
> +static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
> +                                      MachineFrameInfo &MFI,
> +                                      MachineBasicBlock::iterator MI,
> +                                      int Index,
> +                                      int64_t Offset) {
> +  MachineBasicBlock *MBB = MI->getParent();
> +  const DebugLoc &DL = MI->getDebugLoc();
> +  bool IsStore = MI->mayStore();
> +
> +  unsigned Opc = MI->getOpcode();
> +  int LoadStoreOp = IsStore ?
> +    getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
> +  if (LoadStoreOp == -1)
> +    return false;
> +
> +  unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();
> +
> +  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
> +    .addReg(Reg, getDefRegState(!IsStore))
> +    .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
> +    .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
> +    .addImm(Offset)
> +    .addImm(0) // glc
> +    .addImm(0) // slc
> +    .addImm(0) // tfe
> +    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
> +  return true;
> +}
> +
> +void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
> +                                         unsigned LoadStoreOp,
> +                                         const MachineOperand *SrcDst,
> +                                         unsigned ScratchRsrcReg,
> +                                         unsigned ScratchOffset,
> +                                         int64_t Offset,
> +                                         RegScavenger *RS) const {
>   unsigned Value = SrcDst->getReg();
>   bool IsKill = SrcDst->isKill();
>   MachineBasicBlock *MBB = MI->getParent();
> @@ -574,7 +642,7 @@ void SIRegisterInfo::eliminateFrameIndex
>     case AMDGPU::SI_SPILL_V96_SAVE:
>     case AMDGPU::SI_SPILL_V64_SAVE:
>     case AMDGPU::SI_SPILL_V32_SAVE:
> -      buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
> +      buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
>             TII->getNamedOperand(*MI, AMDGPU::OpName::vdata),
>             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
>             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
> @@ -589,7 +657,7 @@ void SIRegisterInfo::eliminateFrameIndex
>     case AMDGPU::SI_SPILL_V128_RESTORE:
>     case AMDGPU::SI_SPILL_V256_RESTORE:
>     case AMDGPU::SI_SPILL_V512_RESTORE: {
> -      buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
> +      buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
>             TII->getNamedOperand(*MI, AMDGPU::OpName::vdata),
>             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
>             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
> @@ -600,6 +668,24 @@ void SIRegisterInfo::eliminateFrameIndex
>     }
> 
>     default: {
> +      if (TII->isMUBUF(*MI)) {
> +        // Disable offen so we don't need a 0 vgpr base.
> +        assert(static_cast<int>(FIOperandNum) ==
> +               AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> +                                          AMDGPU::OpName::vaddr));
> +
> +        int64_t Offset = FrameInfo.getObjectOffset(Index);
> +        int64_t OldImm
> +          = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
> +        int64_t NewOffset = OldImm + Offset;
> +
> +        if (isUInt<12>(NewOffset) &&
> +            buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
> +          MI->eraseFromParent();
> +          break;
> +        }
> +      }
> +
>       int64_t Offset = FrameInfo.getObjectOffset(Index);
>       FIOp.ChangeToImmediate(Offset);
>       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
> 
> Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h Fri Sep 30 20:37:15 2016
> @@ -240,11 +240,11 @@ public:
>   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
> 
> private:
> -  void buildScratchLoadStore(MachineBasicBlock::iterator MI,
> -                             unsigned LoadStoreOp, const MachineOperand *SrcDst,
> -                             unsigned ScratchRsrcReg, unsigned ScratchOffset,
> -                             int64_t Offset,
> -                             RegScavenger *RS) const;
> +  void buildSpillLoadStore(MachineBasicBlock::iterator MI,
> +                           unsigned LoadStoreOp, const MachineOperand *SrcDst,
> +                           unsigned ScratchRsrcReg, unsigned ScratchOffset,
> +                           int64_t Offset,
> +                           RegScavenger *RS) const;
> };
> 
> } // End namespace llvm
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll Fri Sep 30 20:37:15 2016
> @@ -227,8 +227,8 @@ for.end:
> 
> ; R600: MOVA_INT
> 
> -; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
> -; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:2 ; encoding: [0x02,0x10,0x68,0xe0
> +; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; encoding: [0x00,0x00,0x68,0xe0,
> +; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:2 ; encoding: [0x02,0x00,0x68,0xe0,
> ; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
> define void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
> entry:
> @@ -248,8 +248,11 @@ entry:
> 
> ; R600: MOVA_INT
> 
> -; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0
> -; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0
> +; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; encoding:
> +; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:1 ; encoding:
> +
> +; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0
> +; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0
> define void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
> entry:
>   %0 = alloca [2 x i8]
> @@ -262,14 +265,13 @@ entry:
>   %5 = sext i8 %4 to i32
>   store i32 %5, i32 addrspace(1)* %out
>   ret void
> -
> }
> 
> ; Test that two stack objects are not stored in the same register
> ; The second stack object should be in T3.X
> ; FUNC-LABEL: {{^}}no_overlap:
> -; R600_CHECK: MOV
> -; R600_CHECK: [[CHAN:[XYZW]]]+
> +; R600-CHECK: MOV
> +; R600-CHECK: [[CHAN:[XYZW]]]+
> ; R600-NOT: [[CHAN]]+
> ; SI: v_mov_b32_e32 v3
> define void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll Fri Sep 30 20:37:15 2016
> @@ -14,8 +14,7 @@ entry:
> 
> ; GCN-LABEL: {{^}}stored_fi_to_lds:
> ; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
> -; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}}
> -; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]]
> +; GCN: buffer_store_dword v{{[0-9]+}}, off,
> ; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
> ; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
> ; GCN: ds_write_b32  [[VLDSPTR]], [[ZERO0]]
> @@ -118,7 +117,7 @@ define void @stored_fi_to_fi() #0 {
> }
> 
> ; GCN-LABEL: {{^}}stored_fi_to_global:
> -; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
> +; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
> ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
> ; GCN: buffer_store_dword [[FI]]
> define void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {
> @@ -152,18 +151,20 @@ define void @stored_fi_to_global_2_small
> }
> 
> ; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:
> -; GCN: v_mov_b32_e32 [[VAL_0:v[0-9]+]], 0{{$}}
> ; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
> -; GCN: buffer_store_dword [[VAL_0]], [[BASE_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
> +; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
> 
> +; FIXME: Re-initialize
> ; GCN: v_mov_b32_e32 [[BASE_0_1:v[0-9]+]], 0{{$}}
> -; GCN: v_add_i32_e32 [[BASE_1_OFF_0:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
> 
> -; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
> -; GCN: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 56, [[BASE_0_1]]
> -; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
> +; GCN-DAG: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
> +
> +
> +; GCN: v_add_i32_e32 [[BASE_1_OFF_2:v[0-9]+]], vcc, 56, [[BASE_0_1]]
> +; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> 
> -; GCN: buffer_store_dword [[BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
> +; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
> define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
>   %tmp0 = alloca [4096 x i32]
>   %tmp1 = alloca [4096 x i32]
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll Fri Sep 30 20:37:15 2016
> @@ -126,8 +126,8 @@ done:
> 
> ; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
> ; GCN: s_and_saveexec_b64
> -; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
> -; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
> +; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
> +; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
> ; GCN: {{^}}BB4_2:
> define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
> entry:
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll Fri Sep 30 20:37:15 2016
> @@ -2,7 +2,7 @@
> ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> 
> ; FUNC-LABEL: {{^}}load_i8_sext_private:
> -; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
> +; SI: buffer_load_sbyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
> define void @load_i8_sext_private(i32 addrspace(1)* %out) {
> entry:
>   %tmp0 = alloca i8
> @@ -13,7 +13,7 @@ entry:
> }
> 
> ; FUNC-LABEL: {{^}}load_i8_zext_private:
> -; SI: buffer_load_ubyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
> +; SI: buffer_load_ubyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
> define void @load_i8_zext_private(i32 addrspace(1)* %out) {
> entry:
>   %tmp0 = alloca i8
> @@ -24,7 +24,7 @@ entry:
> }
> 
> ; FUNC-LABEL: {{^}}load_i16_sext_private:
> -; SI: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
> +; SI: buffer_load_sshort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
> define void @load_i16_sext_private(i32 addrspace(1)* %out) {
> entry:
>   %tmp0 = alloca i16
> @@ -35,7 +35,7 @@ entry:
> }
> 
> ; FUNC-LABEL: {{^}}load_i16_zext_private:
> -; SI: buffer_load_ushort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
> +; SI: buffer_load_ushort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
> define void @load_i16_zext_private(i32 addrspace(1)* %out) {
> entry:
>   %tmp0 = alloca i16
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll Fri Sep 30 20:37:15 2016
> @@ -207,12 +207,17 @@ define void @dynamic_insertelement_v3i16
> ; GCN: buffer_load_ushort v{{[0-9]+}}, off
> ; GCN: buffer_load_ushort v{{[0-9]+}}, off
> 
> -; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:6
> -; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4
> -; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2
> -; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> +; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
> +; GCN: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
> +
> +; GCN-DAG: buffer_store_short v{{[0-9]+}}, [[BASE_FI]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:6
> +; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4
> +; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
> +; GCN-DAG: buffer_store_short v{{[0-9]+}}, [[BASE_FI]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> ; GCN: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> 
> +; GCN: s_waitcnt
> +
> ; GCN: buffer_load_ushort
> ; GCN: buffer_load_ushort
> ; GCN: buffer_load_ushort
> @@ -229,7 +234,7 @@ define void @dynamic_insertelement_v4i16
> ; GCN: buffer_load_ubyte v{{[0-9]+}}, off
> ; GCN: buffer_load_ubyte v{{[0-9]+}}, off
> 
> -; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1
> +; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
> ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> 
> ; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> @@ -250,7 +255,7 @@ define void @dynamic_insertelement_v2i8(
> ; GCN: buffer_load_ubyte v{{[0-9]+}}, off
> 
> ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2
> -; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1
> +; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
> ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> 
> ; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> @@ -274,8 +279,8 @@ define void @dynamic_insertelement_v3i8(
> ; GCN: buffer_load_ubyte v{{[0-9]+}}, off
> 
> ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:3
> -; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2
> -; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1
> +; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
> +; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
> ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> 
> ; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
> @@ -390,8 +395,8 @@ define void @dynamic_insertelement_v3i64
> 
> ; Stack store
> 
> -; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
> -; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
> +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}
> +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}
> 
> ; Write element
> ; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
> @@ -416,8 +421,8 @@ define void @dynamic_insertelement_v4f64
> ; GCN: SCRATCH_RSRC_DWORD
> 
> ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
> -; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
> -; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:32{{$}}
> +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}
> +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:32{{$}}
> ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:48{{$}}
> 
> ; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll Fri Sep 30 20:37:15 2016
> @@ -6,8 +6,14 @@
> ; from https://bugs.freedesktop.org/show_bug.cgi?id=96602
> ;
> ; CHECK-LABEL: {{^}}main:
> -; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
> -; CHECK-DAG: v_mov_b32_e32 [[ZERO_BASE_FI:v[0-9]+]], 0{{$}}
> +
> +; FIXME: add 0?
> +; CHECK-DAG: s_movk_i32 [[K0:s[0-9]+]], 0x138
> +; CHECK-DAG: v_add_i32_e64 [[ADD_K0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[K0]], 0
> +
> +; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
> +; CHECK-DAG: buffer_store_dword {{v[0-9]+}}, [[ADD_K0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
> +
> ; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
> ; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
> 
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll Fri Sep 30 20:37:15 2016
> @@ -15,7 +15,7 @@
> ; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> 
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8
> +; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24
> 
> @@ -24,10 +24,10 @@
> 
> 
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}
> @@ -60,7 +60,7 @@ entry:
> ; HSA-ELT4: private_element_size = 1
> 
> ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> -; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16
> +; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
> ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:32
> ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:48
> 
> @@ -69,10 +69,10 @@ entry:
> 
> 
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8
> -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16
> +; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
> +; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24
> -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:32
> +; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:40
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:48
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:56
> @@ -82,14 +82,14 @@ entry:
> 
> 
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:12{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:32{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:32{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:36{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:40{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:44{{$}}
> @@ -137,7 +137,7 @@ entry:
> 
> 
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
> 
> @@ -173,7 +173,7 @@ entry:
> 
> 
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
> 
> @@ -207,7 +207,7 @@ entry:
> ; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> 
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> -; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8
> +; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16
> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24
> 
> @@ -216,10 +216,10 @@ entry:
> 
> 
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
> -; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16{{$}}
> +; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}
> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll Fri Sep 30 20:37:15 2016
> @@ -9,9 +9,8 @@
> ; should be able to reuse the same regiser for each scratch buffer access.
> 
> ; GCN-LABEL: {{^}}legal_offset_fi:
> -; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}}
> -; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
> -; GCN: v_mov_b32_e32 [[OFFSET]], 0x8000
> +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+$}}
> +; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000
> ; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
> 
> define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
> @@ -97,7 +96,7 @@ entry:
> }
> 
> ; GCN-LABEL: {{^}}pos_vaddr_offset:
> -; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16
> +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:16
> define void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) {
> entry:
>   %array = alloca [8192 x i32]
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll Fri Sep 30 20:37:15 2016
> @@ -29,10 +29,10 @@
> 
> ; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
> 
> -; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
> -; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
> -; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
> -; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
> +; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
> +; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
> +; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
> +; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
> 
> ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
> ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
> 
> Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=282999&r1=282998&r2=282999&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Fri Sep 30 20:37:15 2016
> @@ -395,7 +395,7 @@ break:
> ; CHECK: s_and_b64 exec, exec, [[LIVE]]
> ; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0
> ; CHECK: s_wqm_b64 exec, exec
> -; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
> +; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+$}}
> ; CHECK: s_and_b64 exec, exec, [[LIVE]]
> ; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen
> ; CHECK: s_wqm_b64 exec, exec
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160930/79d5f865/attachment.html>


More information about the llvm-commits mailing list