[llvm] 7842e17 - [AMDGPU] Fix large return values with amdgpu_gfx

Sebastian Neubauer via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 15 06:05:11 PDT 2021


Author: Sebastian Neubauer
Date: 2021-04-15T14:57:56+02:00
New Revision: 7842e1725e80863cb5462351afbc293cb3a19111

URL: https://github.com/llvm/llvm-project/commit/7842e1725e80863cb5462351afbc293cb3a19111
DIFF: https://github.com/llvm/llvm-project/commit/7842e1725e80863cb5462351afbc293cb3a19111.diff

LOG: [AMDGPU] Fix large return values with amdgpu_gfx

Returning in memory is not supported, so fall back to sret.
Also, extend i1 and i16 to i32. Otherwise, they would be passed through
memory.

Differential Revision: https://reviews.llvm.org/D100543

Added: 
    llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index f25019b167ca1..43cb4bfb4c5cd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -38,6 +38,9 @@ def CC_SI_Gfx : CallingConv<[
 ]>;
 
 def RetCC_SI_Gfx : CallingConv<[
+  CCIfType<[i1], CCPromoteToType<i32>>,
+  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
+
   // 0-3 are reserved for the stack buffer descriptor
   // 32 is reserved for the stack pointer
   CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
@@ -68,8 +71,6 @@ def RetCC_SI_Gfx : CallingConv<[
     VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
     VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
   ]>>>,
-
-  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
 ]>;
 
 def CC_SI_SHADER : CallingConv<[

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 7a8eb95fc6f63..f6f76aa36e275 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -216,13 +216,13 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void
   ; GCN:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
   ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.0, align 16, addrspace 5)
+  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32)
   ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[LOAD]](s1), [[DEF]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
-  ; GCN:   S_SETPC_B64_return [[COPY2]]
+  ; GCN:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1)
+  ; GCN:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; GCN:   S_SETPC_B64_return [[COPY3]]
   %val = call amdgpu_gfx i1 @external_gfx_i1_func_void()
   store volatile i1 %val, i1 addrspace(1)* undef
   ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
new file mode 100644
index 0000000000000..2c6d7ada94261
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -0,0 +1,1284 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
+
+define amdgpu_gfx i1 @return_i1() #0 {
+; GFX9-LABEL: return_i1:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, 1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: return_i1:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  ret i1 1
+}
+
+define amdgpu_gfx void @call_i1() #0 {
+; GFX9-LABEL: call_i1:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_getpc_b64 s[6:7]
+; GFX9-NEXT:    s_add_u32 s6, s6, return_i1 at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s7, s7, return_i1 at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX9-NEXT:    s_mov_b64 s[4:5], s[30:31]
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT:    s_setpc_b64 s[4:5]
+;
+; GFX10-LABEL: call_i1:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    s_getpc_b64 s[6:7]
+; GFX10-NEXT:    s_add_u32 s6, s6, return_i1 at gotpcrel32@lo+4
+; GFX10-NEXT:    s_addc_u32 s7, s7, return_i1 at gotpcrel32@hi+12
+; GFX10-NEXT:    s_mov_b64 s[4:5], s[30:31]
+; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT:    s_setpc_b64 s[4:5]
+entry:
+  call amdgpu_gfx i1 @return_i1()
+  ret void
+}
+
+define amdgpu_gfx i16 @return_i16() #0 {
+; GFX9-LABEL: return_i16:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, 10
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: return_i16:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 10
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  ret i16 10
+}
+
+define amdgpu_gfx void @call_i16() #0 {
+; GFX9-LABEL: call_i16:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_getpc_b64 s[6:7]
+; GFX9-NEXT:    s_add_u32 s6, s6, return_i16 at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s7, s7, return_i16 at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX9-NEXT:    s_mov_b64 s[4:5], s[30:31]
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT:    s_setpc_b64 s[4:5]
+;
+; GFX10-LABEL: call_i16:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    s_getpc_b64 s[6:7]
+; GFX10-NEXT:    s_add_u32 s6, s6, return_i16 at gotpcrel32@lo+4
+; GFX10-NEXT:    s_addc_u32 s7, s7, return_i16 at gotpcrel32@hi+12
+; GFX10-NEXT:    s_mov_b64 s[4:5], s[30:31]
+; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT:    s_setpc_b64 s[4:5]
+entry:
+  call amdgpu_gfx i16 @return_i16()
+  ret void
+}
+
+define amdgpu_gfx <2 x i16> @return_2xi16() #0 {
+; GFX9-LABEL: return_2xi16:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: return_2xi16:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x20001
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  ret <2 x i16> <i16 1, i16 2>
+}
+
+define amdgpu_gfx void @call_2xi16() #0 {
+; GFX9-LABEL: call_2xi16:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_getpc_b64 s[6:7]
+; GFX9-NEXT:    s_add_u32 s6, s6, return_2xi16 at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s7, s7, return_2xi16 at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX9-NEXT:    s_mov_b64 s[4:5], s[30:31]
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT:    s_setpc_b64 s[4:5]
+;
+; GFX10-LABEL: call_2xi16:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    s_getpc_b64 s[6:7]
+; GFX10-NEXT:    s_add_u32 s6, s6, return_2xi16 at gotpcrel32@lo+4
+; GFX10-NEXT:    s_addc_u32 s7, s7, return_2xi16 at gotpcrel32@hi+12
+; GFX10-NEXT:    s_mov_b64 s[4:5], s[30:31]
+; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT:    s_setpc_b64 s[4:5]
+entry:
+  call amdgpu_gfx <2 x i16> @return_2xi16()
+  ret void
+}
+
+define amdgpu_gfx <3 x i16> @return_3xi16() #0 {
+; GFX9-LABEL: return_3xi16:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
+; GFX9-NEXT:    v_mov_b32_e32 v1, 3
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: return_3xi16:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x20001
+; GFX10-NEXT:    v_mov_b32_e32 v1, 3
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  ret <3 x i16> <i16 1, i16 2, i16 3>
+}
+
+define amdgpu_gfx void @call_3xi16() #0 {
+; GFX9-LABEL: call_3xi16:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_getpc_b64 s[6:7]
+; GFX9-NEXT:    s_add_u32 s6, s6, return_3xi16 at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s7, s7, return_3xi16 at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX9-NEXT:    s_mov_b64 s[4:5], s[30:31]
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT:    s_setpc_b64 s[4:5]
+;
+; GFX10-LABEL: call_3xi16:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    s_getpc_b64 s[6:7]
+; GFX10-NEXT:    s_add_u32 s6, s6, return_3xi16 at gotpcrel32@lo+4
+; GFX10-NEXT:    s_addc_u32 s7, s7, return_3xi16 at gotpcrel32@hi+12
+; GFX10-NEXT:    s_mov_b64 s[4:5], s[30:31]
+; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT:    s_setpc_b64 s[4:5]
+entry:
+  call amdgpu_gfx <3 x i16> @return_3xi16()
+  ret void
+}
+
+; Check that return values that do not fit in registers do not crash
+
+define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
+; GFX9-LABEL: return_512xi32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2040
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2036
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2028
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2024
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2020
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2016
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2012
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2008
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2004
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2000
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1996
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1992
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1988
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1984
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1980
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1976
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1972
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1968
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1964
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1960
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1956
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1952
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1948
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1944
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1940
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1936
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1932
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1928
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1924
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1920
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1916
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1912
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1908
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1904
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1900
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1896
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1892
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1888
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1884
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1880
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1876
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1872
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1868
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1864
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1860
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1856
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1852
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1848
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1844
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1840
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1836
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1832
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1828
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1824
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1820
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1816
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1812
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1808
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1804
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1800
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1796
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1792
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1788
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1784
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1780
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1776
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1772
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1768
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1764
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1760
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1756
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1752
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1748
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1744
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1740
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1736
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1732
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1728
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1724
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1720
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1716
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1712
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1708
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1704
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1700
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1696
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1692
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1688
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1684
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1680
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1676
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1672
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1668
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1664
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1660
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1656
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1652
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1648
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1644
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1640
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1636
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1632
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1628
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1624
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1620
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1616
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1612
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1608
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1604
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1600
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1596
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1592
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1588
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1584
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1580
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1576
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1572
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1568
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1564
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1560
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1556
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1552
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1548
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1544
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1540
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1536
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1532
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1528
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1524
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1520
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1516
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1512
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1508
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1504
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1500
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1496
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1492
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1488
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1484
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1480
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1476
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1472
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1468
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1464
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1460
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1456
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1452
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1448
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1444
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1440
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1436
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1432
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1428
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1424
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1420
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1416
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1412
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1408
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1404
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1400
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1396
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1392
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1388
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1384
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1380
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1376
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1372
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1368
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1364
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1360
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1356
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1352
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1348
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1344
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1340
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1336
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1332
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1328
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1324
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1320
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1316
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1312
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1308
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1304
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1300
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1296
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1292
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1288
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1284
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1280
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1276
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1272
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1268
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1264
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1260
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1256
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1252
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1248
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1244
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1240
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1236
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1232
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1228
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1224
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1220
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1216
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1212
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1208
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1204
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1200
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1196
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1192
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1188
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1184
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1180
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1176
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1172
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1168
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1164
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1160
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1156
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1152
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1148
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1144
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1140
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1136
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1132
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1128
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1124
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1120
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1116
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1112
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1108
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1104
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1100
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1096
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1092
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1088
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1084
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1080
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1076
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1072
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1068
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1064
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1060
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1056
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1052
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1048
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1044
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1040
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1036
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1032
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1024
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1016
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1012
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1008
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1004
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1000
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:996
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:992
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:988
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:984
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:980
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:976
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:972
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:968
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:964
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:960
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:956
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:952
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:948
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:944
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:940
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:936
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:932
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:928
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:924
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:920
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:916
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:912
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:908
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:904
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:900
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:896
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:892
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:888
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:884
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:880
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:876
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:872
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:868
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:864
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:860
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:856
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:852
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:848
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:844
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:840
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:836
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:832
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:828
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:824
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:820
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:816
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:812
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:808
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:804
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:800
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:796
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:792
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:788
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:784
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:780
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:776
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:772
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:768
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:764
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:760
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:756
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:752
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:748
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:744
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:740
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:736
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:732
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:728
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:724
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:720
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:716
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:712
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:708
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:704
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:700
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:696
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:692
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:688
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:684
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:680
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:676
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:672
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:668
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:664
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:660
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:656
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:652
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:648
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:644
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:640
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:636
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:632
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:628
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:624
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:620
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:616
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:612
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:608
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:604
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:600
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:596
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:592
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:588
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:584
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:580
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:576
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:572
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:568
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:564
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:560
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:556
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:552
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:548
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:544
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:540
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:536
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:532
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:528
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:524
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:520
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:516
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:512
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:508
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:504
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:500
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:496
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:492
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:488
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:484
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:480
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:476
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:472
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:468
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:464
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:460
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:456
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:452
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:448
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:444
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:440
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:436
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:432
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:428
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:424
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:420
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:416
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:412
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:408
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:404
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:400
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:396
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:392
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:388
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:384
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:380
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:376
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:372
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:368
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:364
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:360
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:356
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:352
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:348
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:344
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:340
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:336
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:332
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:328
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:324
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:320
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:316
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:312
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:308
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:304
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:300
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:296
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:292
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:288
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:284
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:280
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:276
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:272
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:268
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:264
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:260
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:256
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:252
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:248
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:244
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:236
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:232
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:228
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:224
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:220
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:216
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:212
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:208
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:204
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:200
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:196
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:192
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:188
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:184
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:180
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:176
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:172
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:168
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:164
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:160
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:156
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:152
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:148
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:144
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:140
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:136
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:132
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:128
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:124
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:120
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:116
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:108
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:104
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:100
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:96
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:92
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:88
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:84
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:80
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:76
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:72
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:68
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:64
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:52
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:48
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:44
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:40
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:36
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:32
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:28
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:24
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:20
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: return_512xi32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2040
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2036
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2028
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2024
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2020
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2016
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2012
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2008
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2004
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2000
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1996
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1992
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1988
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1984
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1980
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1976
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1972
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1968
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1964
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1960
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1956
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1952
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1948
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1944
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1940
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1936
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1932
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1928
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1924
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1920
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1916
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1912
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1908
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1904
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1900
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1896
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1892
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1888
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1884
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1880
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1876
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1872
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1868
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1864
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1860
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1856
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1852
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1848
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1844
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1840
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1836
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1832
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1828
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1824
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1820
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1816
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1812
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1808
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1804
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1800
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1796
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1792
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1788
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1784
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1780
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1776
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1772
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1768
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1764
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1760
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1756
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1752
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1748
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1744
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1740
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1736
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1732
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1728
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1724
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1720
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1716
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1712
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1708
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1704
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1700
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1696
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1692
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1688
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1684
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1680
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1676
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1672
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1668
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1664
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1660
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1656
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1652
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1648
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1644
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1640
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1636
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1632
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1628
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1624
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1620
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1616
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1612
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1608
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1604
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1600
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1596
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1592
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1588
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1584
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1580
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1576
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1572
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1568
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1564
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1560
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1556
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1552
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1548
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1544
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1540
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1536
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1532
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1528
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1524
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1520
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1516
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1512
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1508
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1504
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1500
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1496
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1492
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1488
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1484
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1480
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1476
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1472
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1468
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1464
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1460
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1456
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1452
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1448
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1444
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1440
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1436
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1432
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1428
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1424
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1420
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1416
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1412
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1408
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1404
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1400
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1396
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1392
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1388
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1384
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1380
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1376
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1372
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1368
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1364
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1360
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1356
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1352
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1348
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1344
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1340
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1336
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1332
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1328
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1324
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1320
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1316
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1312
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1308
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1304
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1300
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1296
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1292
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1288
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1284
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1280
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1276
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1272
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1268
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1264
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1260
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1256
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1252
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1248
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1244
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1240
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1236
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1232
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1228
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1224
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1220
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1216
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1212
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1208
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1204
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1200
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1196
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1192
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1188
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1184
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1180
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1176
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1172
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1168
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1164
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1160
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1156
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1152
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1148
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1144
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1140
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1136
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1132
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1128
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1124
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1120
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1116
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1112
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1108
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1104
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1100
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1096
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1092
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1088
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1084
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1080
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1076
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1072
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1068
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1064
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1060
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1056
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1052
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1048
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1044
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1040
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1036
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1032
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1024
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1016
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1012
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1008
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1004
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1000
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:996
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:992
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:988
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:984
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:980
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:976
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:972
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:968
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:964
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:960
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:956
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:952
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:948
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:944
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:940
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:936
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:932
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:928
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:924
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:920
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:916
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:912
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:908
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:904
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:900
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:896
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:892
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:888
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:884
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:880
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:876
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:872
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:868
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:864
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:860
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:856
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:852
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:848
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:844
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:840
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:836
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:832
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:828
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:824
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:820
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:816
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:812
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:808
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:804
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:800
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:796
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:792
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:788
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:784
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:780
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:776
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:772
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:768
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:764
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:760
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:756
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:752
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:748
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:744
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:740
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:736
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:732
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:728
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:724
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:720
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:716
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:712
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:708
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:704
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:700
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:696
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:692
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:688
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:684
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:680
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:676
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:672
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:668
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:664
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:660
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:656
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:652
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:648
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:644
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:640
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:636
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:632
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:628
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:624
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:620
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:616
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:612
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:608
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:604
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:600
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:596
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:592
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:588
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:584
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:580
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:576
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:572
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:568
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:564
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:560
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:556
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:552
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:548
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:544
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:540
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:536
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:532
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:528
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:524
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:520
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:516
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:512
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:508
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:504
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:500
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:496
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:492
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:488
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:484
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:480
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:476
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:472
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:468
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:464
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:460
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:456
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:452
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:448
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:444
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:440
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:436
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:432
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:428
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:424
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:420
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:416
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:412
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:408
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:404
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:400
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:396
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:392
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:388
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:384
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:380
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:376
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:372
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:368
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:364
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:360
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:356
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:352
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:348
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:344
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:340
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:336
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:332
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:328
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:324
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:320
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:316
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:312
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:308
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:304
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:300
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:296
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:292
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:288
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:284
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:280
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:276
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:272
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:268
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:264
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:260
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:256
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:252
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:248
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:244
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:236
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:232
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:228
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:224
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:220
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:216
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:212
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:208
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:204
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:200
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:196
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:192
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:188
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:184
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:180
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:176
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:172
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:168
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:164
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:160
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:156
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:152
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:148
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:144
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:140
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:136
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:132
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:128
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:124
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:120
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:116
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:108
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:104
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:100
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:96
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:92
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:88
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:84
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:80
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:76
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:72
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:68
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:64
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:52
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:48
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:44
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:40
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:36
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:32
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:28
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:24
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:20
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
+; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  ret <512 x i32> zeroinitializer
+}
+
+define amdgpu_gfx void @call_512xi32() #0 {
+; GFX9-LABEL: call_512xi32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s33
+; GFX9-NEXT:    s_add_u32 s33, s32, 0x1ffc0
+; GFX9-NEXT:    s_and_b32 s33, s33, 0xfffe0000
+; GFX9-NEXT:    s_add_u32 s32, s32, 0x60000
+; GFX9-NEXT:    s_getpc_b64 s[6:7]
+; GFX9-NEXT:    s_add_u32 s6, s6, return_512xi32 at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s7, s7, return_512xi32 at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX9-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
+; GFX9-NEXT:    s_mov_b64 s[4:5], s[30:31]
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT:    s_sub_u32 s32, s32, 0x60000
+; GFX9-NEXT:    s_mov_b32 s33, s8
+; GFX9-NEXT:    s_setpc_b64 s[4:5]
+;
+; GFX10-LABEL: call_512xi32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    s_mov_b32 s8, s33
+; GFX10-NEXT:    s_add_u32 s33, s32, 0xffe0
+; GFX10-NEXT:    s_mov_b64 s[4:5], s[30:31]
+; GFX10-NEXT:    s_and_b32 s33, s33, 0xffff0000
+; GFX10-NEXT:    s_add_u32 s32, s32, 0x30000
+; GFX10-NEXT:    s_getpc_b64 s[6:7]
+; GFX10-NEXT:    s_add_u32 s6, s6, return_512xi32 at gotpcrel32@lo+4
+; GFX10-NEXT:    s_addc_u32 s7, s7, return_512xi32 at gotpcrel32@hi+12
+; GFX10-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
+; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT:    s_sub_u32 s32, s32, 0x30000
+; GFX10-NEXT:    s_mov_b32 s33, s8
+; GFX10-NEXT:    s_setpc_b64 s[4:5]
+entry:
+  call amdgpu_gfx <512 x i32> @return_512xi32()
+  ret void
+}
+
+attributes #0 = { nounwind }


        


More information about the llvm-commits mailing list