<html><head><meta http-equiv="Content-Type" content="text/html charset=utf-8"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class="">Hi Matt,<div class=""><br class=""></div><div class="">Tests are not passing:</div><div class=""> <a href="http://lab.llvm.org:8011/builders/clang-x86_64-linux-selfhost-modules/builds/20038" class="">http://lab.llvm.org:8011/builders/clang-x86_64-linux-selfhost-modules/builds/20038</a></div> <a href="http://lab.llvm.org:8011/builders/clang-with-lto-ubuntu/builds/587" class="">http://lab.llvm.org:8011/builders/clang-with-lto-ubuntu/builds/587</a><div class=""><br class=""></div><div class="">I reverted in r283003.</div><div class=""><br class=""></div><div class="">— </div><div class="">Mehdi</div><div class=""><br class=""></div><div class=""><br class=""><div><blockquote type="cite" class=""><div class="">On Sep 30, 2016, at 6:37 PM, Matt Arsenault via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org" class="">llvm-commits@lists.llvm.org</a>> wrote:</div><br class="Apple-interchange-newline"><div class=""><div class="">Author: arsenm<br class="">Date: Fri Sep 30 20:37:15 2016<br class="">New Revision: 282999<br class=""><br class="">URL: <a href="http://llvm.org/viewvc/llvm-project?rev=282999&view=rev" class="">http://llvm.org/viewvc/llvm-project?rev=282999&view=rev</a><br class="">Log:<br class="">AMDGPU: Don't use offen if it is 0<br class=""><br class="">This removes many re-initializations of a base register to 0.<br class=""><br class="">Modified:<br class="">    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp<br class="">    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h<br class="">    llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll<br class="">    llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll<br class="">    llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll<br class="">    llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll<br class="">    llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll<br class="">    llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll<br class="">    llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll<br class="">    llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll<br class="">    llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll<br class="">    llvm/trunk/test/CodeGen/AMDGPU/wqm.ll<br class=""><br class="">Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)<br class="">+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Fri Sep 30 20:37:15 2016<br class="">@@ -320,14 +320,82 @@ static unsigned getNumSubRegsForSpillOp(<br class="">   }<br class=""> }<br class=""><br class="">-void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,<br class="">-                                           unsigned LoadStoreOp,<br class="">-                                           const MachineOperand *SrcDst,<br class="">-                                           unsigned ScratchRsrcReg,<br class="">-                                           unsigned ScratchOffset,<br class="">-                                           int64_t Offset,<br class="">-                                           RegScavenger *RS) const {<br class="">+static int getOffsetMUBUFStore(unsigned Opc) {<br class="">+  switch (Opc) {<br class="">+  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:<br class="">+    return AMDGPU::BUFFER_STORE_DWORD_OFFSET;<br class="">+  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:<br class="">+    return AMDGPU::BUFFER_STORE_BYTE_OFFSET;<br class="">+  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:<br class="">+    return AMDGPU::BUFFER_STORE_SHORT_OFFSET;<br class="">+  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:<br class="">+    return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;<br class="">+  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:<br class="">+    return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;<br class="">+  default:<br class="">+    return -1;<br class="">+  }<br class="">+}<br class=""><br class="">+static int getOffsetMUBUFLoad(unsigned Opc) {<br class="">+  switch (Opc) {<br class="">+  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:<br class="">+    return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;<br class="">+  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:<br class="">+    return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;<br class="">+  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:<br class="">+    return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;<br class="">+  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:<br class="">+    return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;<br class="">+  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:<br class="">+    return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;<br class="">+  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:<br class="">+    return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;<br class="">+  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:<br class="">+    return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;<br class="">+  default:<br class="">+    return -1;<br class="">+  }<br class="">+}<br class="">+<br class="">+// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not<br class="">+// need to handle the case where an SGPR may need to be spilled while spilling.<br class="">+static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,<br class="">+                                      MachineFrameInfo &MFI,<br class="">+                                      MachineBasicBlock::iterator MI,<br class="">+                                      int Index,<br class="">+                                      int64_t Offset) {<br class="">+  MachineBasicBlock *MBB = MI->getParent();<br class="">+  const DebugLoc &DL = MI->getDebugLoc();<br class="">+  bool IsStore = MI->mayStore();<br class="">+<br class="">+  unsigned Opc = MI->getOpcode();<br class="">+  int LoadStoreOp = IsStore ?<br class="">+    getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);<br class="">+  if (LoadStoreOp == -1)<br class="">+    return false;<br class="">+<br class="">+  unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();<br class="">+<br class="">+  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))<br class="">+    .addReg(Reg, getDefRegState(!IsStore))<br class="">+    .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))<br class="">+    .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))<br class="">+    .addImm(Offset)<br class="">+    .addImm(0) // glc<br class="">+    .addImm(0) // slc<br class="">+    .addImm(0) // tfe<br class="">+    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());<br class="">+  return true;<br class="">+}<br class="">+<br class="">+void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,<br class="">+                                         unsigned LoadStoreOp,<br class="">+                                         const MachineOperand *SrcDst,<br class="">+                                         unsigned ScratchRsrcReg,<br class="">+                                         unsigned ScratchOffset,<br class="">+                                         int64_t Offset,<br class="">+                                         RegScavenger *RS) const {<br class="">   unsigned Value = SrcDst->getReg();<br class="">   bool IsKill = SrcDst->isKill();<br class="">   MachineBasicBlock *MBB = MI->getParent();<br class="">@@ -574,7 +642,7 @@ void SIRegisterInfo::eliminateFrameIndex<br class="">     case AMDGPU::SI_SPILL_V96_SAVE:<br class="">     case AMDGPU::SI_SPILL_V64_SAVE:<br class="">     case AMDGPU::SI_SPILL_V32_SAVE:<br class="">-      buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,<br class="">+      buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,<br class="">             TII->getNamedOperand(*MI, AMDGPU::OpName::vdata),<br class="">             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),<br class="">             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),<br class="">@@ -589,7 +657,7 @@ void SIRegisterInfo::eliminateFrameIndex<br class="">     case AMDGPU::SI_SPILL_V128_RESTORE:<br class="">     case AMDGPU::SI_SPILL_V256_RESTORE:<br class="">     case AMDGPU::SI_SPILL_V512_RESTORE: {<br class="">-      buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,<br class="">+      buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,<br class="">             TII->getNamedOperand(*MI, AMDGPU::OpName::vdata),<br class="">             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),<br class="">             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),<br class="">@@ -600,6 +668,24 @@ void SIRegisterInfo::eliminateFrameIndex<br class="">     }<br class=""><br class="">     default: {<br class="">+      if (TII->isMUBUF(*MI)) {<br class="">+        // Disable offen so we don't need a 0 vgpr base.<br class="">+        assert(static_cast<int>(FIOperandNum) ==<br class="">+               AMDGPU::getNamedOperandIdx(MI->getOpcode(),<br class="">+                                          AMDGPU::OpName::vaddr));<br class="">+<br class="">+        int64_t Offset = FrameInfo.getObjectOffset(Index);<br class="">+        int64_t OldImm<br class="">+          = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();<br class="">+        int64_t NewOffset = OldImm + Offset;<br class="">+<br class="">+        if (isUInt<12>(NewOffset) &&<br class="">+            buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {<br class="">+          MI->eraseFromParent();<br class="">+          break;<br class="">+        }<br class="">+      }<br class="">+<br class="">       int64_t Offset = FrameInfo.getObjectOffset(Index);<br class="">       FIOp.ChangeToImmediate(Offset);<br class="">       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {<br class=""><br class="">Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h (original)<br class="">+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h Fri Sep 30 20:37:15 2016<br class="">@@ -240,11 +240,11 @@ public:<br class="">   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;<br class=""><br class=""> private:<br class="">-  void buildScratchLoadStore(MachineBasicBlock::iterator MI,<br class="">-                             unsigned LoadStoreOp, const MachineOperand *SrcDst,<br class="">-                             unsigned ScratchRsrcReg, unsigned ScratchOffset,<br class="">-                             int64_t Offset,<br class="">-                             RegScavenger *RS) const;<br class="">+  void buildSpillLoadStore(MachineBasicBlock::iterator MI,<br class="">+                           unsigned LoadStoreOp, const MachineOperand *SrcDst,<br class="">+                           unsigned ScratchRsrcReg, unsigned ScratchOffset,<br class="">+                           int64_t Offset,<br class="">+                           RegScavenger *RS) const;<br class=""> };<br class=""><br class=""> } // End namespace llvm<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu.private-memory.ll Fri Sep 30 20:37:15 2016<br class="">@@ -227,8 +227,8 @@ for.end:<br class=""><br class=""> ; R600: MOVA_INT<br class=""><br class="">-; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0<br class="">-; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:2 ; encoding: [0x02,0x10,0x68,0xe0<br class="">+; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; encoding: [0x00,0x00,0x68,0xe0,<br class="">+; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:2 ; encoding: [0x02,0x00,0x68,0xe0,<br class=""> ; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}<br class=""> define void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {<br class=""> entry:<br class="">@@ -248,8 +248,11 @@ entry:<br class=""><br class=""> ; R600: MOVA_INT<br class=""><br class="">-; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0<br class="">-; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0<br class="">+; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; encoding:<br class="">+; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:1 ; encoding:<br class="">+<br class="">+; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0<br class="">+; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0<br class=""> define void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {<br class=""> entry:<br class="">   %0 = alloca [2 x i8]<br class="">@@ -262,14 +265,13 @@ entry:<br class="">   %5 = sext i8 %4 to i32<br class="">   store i32 %5, i32 addrspace(1)* %out<br class="">   ret void<br class="">-<br class=""> }<br class=""><br class=""> ; Test that two stack objects are not stored in the same register<br class=""> ; The second stack object should be in T3.X<br class=""> ; FUNC-LABEL: {{^}}no_overlap:<br class="">-; R600_CHECK: MOV<br class="">-; R600_CHECK: [[CHAN:[XYZW]]]+<br class="">+; R600-CHECK: MOV<br class="">+; R600-CHECK: [[CHAN:[XYZW]]]+<br class=""> ; R600-NOT: [[CHAN]]+<br class=""> ; SI: v_mov_b32_e32 v3<br class=""> define void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll Fri Sep 30 20:37:15 2016<br class="">@@ -14,8 +14,7 @@ entry:<br class=""><br class=""> ; GCN-LABEL: {{^}}stored_fi_to_lds:<br class=""> ; GCN: s_load_dword [[LDSPTR:s[0-9]+]]<br class="">-; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}}<br class="">-; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]]<br class="">+; GCN: buffer_store_dword v{{[0-9]+}}, off,<br class=""> ; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}<br class=""> ; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]<br class=""> ; GCN: ds_write_b32  [[VLDSPTR]], [[ZERO0]]<br class="">@@ -118,7 +117,7 @@ define void @stored_fi_to_fi() #0 {<br class=""> }<br class=""><br class=""> ; GCN-LABEL: {{^}}stored_fi_to_global:<br class="">-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen<br class="">+; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}<br class=""> ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}<br class=""> ; GCN: buffer_store_dword [[FI]]<br class=""> define void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {<br class="">@@ -152,18 +151,20 @@ define void @stored_fi_to_global_2_small<br class=""> }<br class=""><br class=""> ; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:<br class="">-; GCN: v_mov_b32_e32 [[VAL_0:v[0-9]+]], 0{{$}}<br class=""> ; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}<br class="">-; GCN: buffer_store_dword [[VAL_0]], [[BASE_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen<br class="">+; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}<br class=""><br class="">+; FIXME: Re-initialize<br class=""> ; GCN: v_mov_b32_e32 [[BASE_0_1:v[0-9]+]], 0{{$}}<br class="">-; GCN: v_add_i32_e32 [[BASE_1_OFF_0:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]<br class=""><br class="">-; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}<br class="">-; GCN: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 56, [[BASE_0_1]]<br class="">-; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class="">+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}<br class="">+; GCN-DAG: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]<br class="">+<br class="">+<br class="">+; GCN: v_add_i32_e32 [[BASE_1_OFF_2:v[0-9]+]], vcc, 56, [[BASE_0_1]]<br class="">+; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class=""><br class="">-; GCN: buffer_store_dword [[BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}<br class="">+; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}<br class=""> define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {<br class="">   %tmp0 = alloca [4096 x i32]<br class="">   %tmp1 = alloca [4096 x i32]<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll Fri Sep 30 20:37:15 2016<br class="">@@ -126,8 +126,8 @@ done:<br class=""><br class=""> ; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:<br class=""> ; GCN: s_and_saveexec_b64<br class="">-; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}<br class="">-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}<br class="">+; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}<br class="">+; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}<br class=""> ; GCN: {{^}}BB4_2:<br class=""> define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {<br class=""> entry:<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll Fri Sep 30 20:37:15 2016<br class="">@@ -2,7 +2,7 @@<br class=""> ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s<br class=""><br class=""> ; FUNC-LABEL: {{^}}load_i8_sext_private:<br class="">-; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen<br class="">+; SI: buffer_load_sbyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}<br class=""> define void @load_i8_sext_private(i32 addrspace(1)* %out) {<br class=""> entry:<br class="">   %tmp0 = alloca i8<br class="">@@ -13,7 +13,7 @@ entry:<br class=""> }<br class=""><br class=""> ; FUNC-LABEL: {{^}}load_i8_zext_private:<br class="">-; SI: buffer_load_ubyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen<br class="">+; SI: buffer_load_ubyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}<br class=""> define void @load_i8_zext_private(i32 addrspace(1)* %out) {<br class=""> entry:<br class="">   %tmp0 = alloca i8<br class="">@@ -24,7 +24,7 @@ entry:<br class=""> }<br class=""><br class=""> ; FUNC-LABEL: {{^}}load_i16_sext_private:<br class="">-; SI: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen<br class="">+; SI: buffer_load_sshort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}<br class=""> define void @load_i16_sext_private(i32 addrspace(1)* %out) {<br class=""> entry:<br class="">   %tmp0 = alloca i16<br class="">@@ -35,7 +35,7 @@ entry:<br class=""> }<br class=""><br class=""> ; FUNC-LABEL: {{^}}load_i16_zext_private:<br class="">-; SI: buffer_load_ushort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen<br class="">+; SI: buffer_load_ushort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}<br class=""> define void @load_i16_zext_private(i32 addrspace(1)* %out) {<br class=""> entry:<br class="">   %tmp0 = alloca i16<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll Fri Sep 30 20:37:15 2016<br class="">@@ -207,12 +207,17 @@ define void @dynamic_insertelement_v3i16<br class=""> ; GCN: buffer_load_ushort v{{[0-9]+}}, off<br class=""> ; GCN: buffer_load_ushort v{{[0-9]+}}, off<br class=""><br class="">-; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:6<br class="">-; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4<br class="">-; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2<br class="">-; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class="">+; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}<br class="">+; GCN: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}<br class="">+<br class="">+; GCN-DAG: buffer_store_short v{{[0-9]+}}, [[BASE_FI]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:6<br class="">+; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4<br class="">+; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2<br class="">+; GCN-DAG: buffer_store_short v{{[0-9]+}}, [[BASE_FI]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class=""> ; GCN: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class=""><br class="">+; GCN: s_waitcnt<br class="">+<br class=""> ; GCN: buffer_load_ushort<br class=""> ; GCN: buffer_load_ushort<br class=""> ; GCN: buffer_load_ushort<br class="">@@ -229,7 +234,7 @@ define void @dynamic_insertelement_v4i16<br class=""> ; GCN: buffer_load_ubyte v{{[0-9]+}}, off<br class=""> ; GCN: buffer_load_ubyte v{{[0-9]+}}, off<br class=""><br class="">-; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1<br class="">+; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1<br class=""> ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class=""><br class=""> ; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class="">@@ -250,7 +255,7 @@ define void @dynamic_insertelement_v2i8(<br class=""> ; GCN: buffer_load_ubyte v{{[0-9]+}}, off<br class=""><br class=""> ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2<br class="">-; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1<br class="">+; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1<br class=""> ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class=""><br class=""> ; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class="">@@ -274,8 +279,8 @@ define void @dynamic_insertelement_v3i8(<br class=""> ; GCN: buffer_load_ubyte v{{[0-9]+}}, off<br class=""><br class=""> ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:3<br class="">-; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2<br class="">-; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1<br class="">+; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2<br class="">+; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1<br class=""> ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class=""><br class=""> ; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}<br class="">@@ -390,8 +395,8 @@ define void @dynamic_insertelement_v3i64<br class=""><br class=""> ; Stack store<br class=""><br class="">-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}<br class="">-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}<br class="">+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}<br class="">+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}<br class=""><br class=""> ; Write element<br class=""> ; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}<br class="">@@ -416,8 +421,8 @@ define void @dynamic_insertelement_v4f64<br class=""> ; GCN: SCRATCH_RSRC_DWORD<br class=""><br class=""> ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}<br class="">-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}<br class="">-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:32{{$}}<br class="">+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}<br class="">+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:32{{$}}<br class=""> ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:48{{$}}<br class=""><br class=""> ; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll Fri Sep 30 20:37:15 2016<br class="">@@ -6,8 +6,14 @@<br class=""> ; from <a href="https://bugs.freedesktop.org/show_bug.cgi?id=96602" class="">https://bugs.freedesktop.org/show_bug.cgi?id=96602</a><br class=""> ;<br class=""> ; CHECK-LABEL: {{^}}main:<br class="">-; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0<br class="">-; CHECK-DAG: v_mov_b32_e32 [[ZERO_BASE_FI:v[0-9]+]], 0{{$}}<br class="">+<br class="">+; FIXME: add 0?<br class="">+; CHECK-DAG: s_movk_i32 [[K0:s[0-9]+]], 0x138<br class="">+; CHECK-DAG: v_add_i32_e64 [[ADD_K0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[K0]], 0<br class="">+<br class="">+; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0<br class="">+; CHECK-DAG: buffer_store_dword {{v[0-9]+}}, [[ADD_K0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}<br class="">+<br class=""> ; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]<br class=""> ; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]<br class=""><br class=""><br class="">Modified: llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/AMDGPU/private-element-size.ll Fri Sep 30 20:37:15 2016<br class="">@@ -15,7 +15,7 @@<br class=""> ; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class=""><br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class="">-; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8<br class="">+; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8<br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16<br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24<br class=""><br class="">@@ -24,10 +24,10 @@<br class=""><br class=""><br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}<br class="">@@ -60,7 +60,7 @@ entry:<br class=""> ; HSA-ELT4: private_element_size = 1<br class=""><br class=""> ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class="">-; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16<br class="">+; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16<br class=""> ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:32<br class=""> ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:48<br class=""><br class="">@@ -69,10 +69,10 @@ entry:<br class=""><br class=""><br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class="">-; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8<br class="">-; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16<br class="">+; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8<br class="">+; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16<br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24<br class="">-; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:32<br class="">+; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32<br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:40<br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:48<br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:56<br class="">@@ -82,14 +82,14 @@ entry:<br class=""><br class=""><br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:12{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:32{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:32{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:36{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:40{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:44{{$}}<br class="">@@ -137,7 +137,7 @@ entry:<br class=""><br class=""><br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}<br class=""><br class="">@@ -173,7 +173,7 @@ entry:<br class=""><br class=""><br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}<br class=""><br class="">@@ -207,7 +207,7 @@ entry:<br class=""> ; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class=""><br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class="">-; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8<br class="">+; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8<br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16<br class=""> ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24<br class=""><br class="">@@ -216,10 +216,10 @@ entry:<br class=""><br class=""><br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}<br class="">-; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16{{$}}<br class="">+; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}<br class=""> ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/AMDGPU/scratch-buffer.ll Fri Sep 30 20:37:15 2016<br class="">@@ -9,9 +9,8 @@<br class=""> ; should be able to reuse the same regiser for each scratch buffer access.<br class=""><br class=""> ; GCN-LABEL: {{^}}legal_offset_fi:<br class="">-; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}}<br class="">-; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen<br class="">-; GCN: v_mov_b32_e32 [[OFFSET]], 0x8000<br class="">+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+$}}<br class="">+; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000<br class=""> ; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}<br class=""><br class=""> define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {<br class="">@@ -97,7 +96,7 @@ entry:<br class=""> }<br class=""><br class=""> ; GCN-LABEL: {{^}}pos_vaddr_offset:<br class="">-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16<br class="">+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:16<br class=""> define void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) {<br class=""> entry:<br class="">   %array = alloca [8192 x i32]<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll Fri Sep 30 20:37:15 2016<br class="">@@ -29,10 +29,10 @@<br class=""><br class=""> ; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill<br class=""><br class="">-; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}<br class="">-; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}<br class="">-; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}<br class="">-; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}<br class="">+; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}<br class="">+; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}<br class="">+; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}<br class="">+; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}<br class=""><br class=""> ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}<br class=""> ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=282999&r1=282998&r2=282999&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=282999&r1=282998&r2=282999&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Fri Sep 30 20:37:15 2016<br class="">@@ -395,7 +395,7 @@ break:<br class=""> ; CHECK: s_and_b64 exec, exec, [[LIVE]]<br class=""> ; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0<br class=""> ; CHECK: s_wqm_b64 exec, exec<br class="">-; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen<br class="">+; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+$}}<br class=""> ; CHECK: s_and_b64 exec, exec, [[LIVE]]<br class=""> ; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen<br class=""> ; CHECK: s_wqm_b64 exec, exec<br class=""><br class=""><br class="">_______________________________________________<br class="">llvm-commits mailing list<br class=""><a href="mailto:llvm-commits@lists.llvm.org" class="">llvm-commits@lists.llvm.org</a><br class="">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits<br class=""></div></div></blockquote></div><br class=""></div></body></html>