[llvm] r309776 - AMDGPU: Don't place arguments in emergency stack slot

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 1 17:59:51 PDT 2017


Author: arsenm
Date: Tue Aug  1 17:59:51 2017
New Revision: 309776

URL: http://llvm.org/viewvc/llvm-project?rev=309776&view=rev
Log:
AMDGPU: Don't place arguments in emergency stack slot

When finding the fixed offsets for function arguments,
this needs to skip over the 4 bytes reserved for the
emergency stack slot.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll
    llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
    llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
    llvm/trunk/test/CodeGen/AMDGPU/function-args.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=309776&r1=309775&r2=309776&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Aug  1 17:59:51 2017
@@ -1345,6 +1345,12 @@ SDValue SITargetLowering::LowerFormalArg
   bool IsKernel = AMDGPU::isKernel(CallConv);
   bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
 
+  if (!IsEntryFunc) {
+    // 4 bytes are reserved at offset 0 for the emergency stack slot. Skip over
+    // this when allocating argument fixed offsets.
+    CCInfo.AllocateStack(4, 4);
+  }
+
   if (IsShader) {
     processShaderInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);
 
@@ -1818,7 +1824,9 @@ SDValue SITargetLowering::LowerCall(Call
 
   // Stack pointer relative accesses are done by changing the offset SGPR. This
   // is just the VGPR offset component.
-  SDValue StackPtr = DAG.getConstant(0, DL, MVT::i32);
+
+  // The first 4 bytes are reserved for the callee's emergency stack slot.
+  SDValue StackPtr = DAG.getConstant(4, DL, MVT::i32);
 
   SmallVector<SDValue, 8> MemOpChains;
   MVT PtrVT = MVT::i32;

Modified: llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll?rev=309776&r1=309775&r2=309776&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll Tue Aug  1 17:59:51 2017
@@ -5,14 +5,14 @@
 
 ; GCN-LABEL: {{^}}void_func_byval_struct:
 ; GCN: s_mov_b32 s5, s32
-; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5{{$}}
+; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
 ; GCN-NOT: s32
-; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5{{$}}
+; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5 offset:4{{$}}
 ; GCN-NOT: s32
 
-; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
 ; GCN-NOT: s32
-; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:16{{$}}
+; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:20{{$}}
 ; GCN-NOT: s32
 define void @void_func_byval_struct(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 {
 entry:
@@ -33,22 +33,22 @@ entry:
 ; GCN: buffer_store_dword v32
 ; GCN: v_writelane_b32
 
-; GCN-DAG: s_add_u32 s32, s32, 0x900{{$}}
+; GCN-DAG: s_add_u32 s32, s32, 0xa00{{$}}
 
-; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5{{$}}
+; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
 ; GCN: v_add_i32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
-; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s5{{$}}
+; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
 
-; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
 ; GCN: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
 
 ; GCN: s_swappc_b64
 
-; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:16{{$}}
+; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}}
 
 ; GCN: v_readlane_b32
 ; GCN: buffer_load_dword v32,
-; GCN: s_sub_u32 s32, s32, 0x900{{$}}
+; GCN: s_sub_u32 s32, s32, 0xa00{{$}}
 ; GCN: s_setpc_b64
 define void  @void_func_byval_struct_non_leaf(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 {
 entry:
@@ -83,20 +83,20 @@ entry:
 ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
 
 
-; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
-; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
-; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
-; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
+; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
+; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
+; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
 
 ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36
 
-; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
-; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
-; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
-; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
+; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20
+; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24
+; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
+; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
 
 ; GCN: s_swappc_b64
 ; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}}
@@ -140,20 +140,20 @@ entry:
 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
 
-; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
-; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
-; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
-; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
+; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
+; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
+; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
 
 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
 ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
 ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36
 
-; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
-; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
-; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
-; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28
+; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20
+; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24
+; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
+; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
 
 
 ; GCN: s_swappc_b64

Modified: llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll?rev=309776&r1=309775&r2=309776&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll Tue Aug  1 17:59:51 2017
@@ -400,7 +400,7 @@ define amdgpu_kernel void @test_call_ext
 ; GCN-DAG: buffer_load_dwordx4 v[24:27], off
 ; GCN-DAG: buffer_load_dwordx4 v[28:31], off
 
-; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SP_REG]]{{$}}
+; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SP_REG]] offset:4{{$}}
 ; GCN: s_waitcnt
 ; GCN-NEXT: s_swappc_b64
 ; GCN-NEXT: s_endpgm
@@ -452,15 +452,15 @@ define amdgpu_kernel void @test_call_ext
 ; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8
 ; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12
 
-; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:4
-; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]]{{$}}
+; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8
+; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4
 
 
 ; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], s33 offset:8
 ; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], s33 offset:12
 
-; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:4
-; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]]{{$}}
+; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8
+; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4
 
 ; GCN-NEXT: s_swappc_b64
 ; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200
@@ -487,8 +487,8 @@ define amdgpu_kernel void @test_call_ext
 ; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12
 
 ; GCN-DAG: s_add_u32 [[SP]], [[SP]], 0x200
-; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4
-; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]]{{$}}
+; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8
+; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4
 ; GCN-NEXT: s_swappc_b64
 ; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16
 ; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20

Modified: llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll?rev=309776&r1=309775&r2=309776&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll Tue Aug  1 17:59:51 2017
@@ -72,9 +72,10 @@ define void @func_load_private_arg_i32_p
 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr:
 ; GCN: s_waitcnt
 ; GCN-NEXT: s_mov_b32 s5, s32
-; GCN-NEXT: s_sub_u32 [[SUB:s[0-9]+]], s5, s4
-; GCN-NEXT: v_lshr_b32_e64 v0, [[SUB]], 6
-; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0
+; GCN-NEXT: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s5, s4
+; GCN-NEXT: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6
+; GCN-NEXT: v_add_i32_e64 [[ADD:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]]
+; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, [[ADD]]
 ; GCN-NOT: v_mov
 ; GCN: ds_write_b32 v0, v0
 define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 }* byval %arg0) #0 {
@@ -101,11 +102,12 @@ define void @void_func_byval_struct_i8_i
 }
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block:
-; GCN: s_sub_u32 s6, s5, s4
-; GCN: v_lshr_b32_e64 v1, s6, 6
+; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s5, s4
+; GCN: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6
+; GCN: v_add_i32_e64 [[ADD:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]]
 ; GCN: s_and_saveexec_b64
 
-; GCN: v_add_i32_e32 v0, vcc, 4, v1
+; GCN: v_add_i32_e32 v0, vcc, 4, [[ADD]]
 ; GCN: buffer_load_dword v1, v1, s[0:3], s4 offen offset:4
 ; GCN: ds_write_b32
 define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 }* byval %arg0, i32 %arg2) #0 {

Modified: llvm/trunk/test/CodeGen/AMDGPU/function-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/function-args.ll?rev=309776&r1=309775&r2=309776&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/function-args.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/function-args.ll Tue Aug  1 17:59:51 2017
@@ -502,8 +502,8 @@ define void @void_func_struct_i8_i32({ i
 }
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32:
-; GCN-DAG: buffer_load_ubyte v[[ELT0:[0-9]+]], off, s[0:3], s5{{$}}
-; GCN-DAG: buffer_load_dword v[[ELT1:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GCN-DAG: buffer_load_ubyte v[[ELT0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[ELT1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
 ; GCN-DAG: buffer_store_dword v[[ELT1]]
 ; GCN-DAG: buffer_store_byte v[[ELT0]]
 define void @void_func_byval_struct_i8_i32({ i8, i32 }* byval %arg0) #0 {
@@ -513,10 +513,10 @@ define void @void_func_byval_struct_i8_i
 }
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_x2:
-; GCN: buffer_load_ubyte v[[ELT0_0:[0-9]+]], off, s[0:3], s5{{$}}
-; GCN: buffer_load_dword v[[ELT1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN: buffer_load_ubyte v[[ELT0_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN: buffer_load_dword v[[ELT1_1:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; GCN: buffer_load_ubyte v[[ELT0_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GCN: buffer_load_dword v[[ELT1_0:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; GCN: buffer_load_ubyte v[[ELT0_1:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; GCN: buffer_load_dword v[[ELT1_1:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
 
 ; GCN: ds_write_b32 v0, v0
 ; GCN: s_setpc_b64
@@ -530,7 +530,7 @@ define void @void_func_byval_struct_i8_i
 }
 
 ; GCN-LABEL: {{^}}void_func_byval_i32_byval_i64:
-; GCN-DAG: buffer_load_dword v[[ARG0_LOAD:[0-9]+]], off, s[0:3], s5{{$}}
+; GCN-DAG: buffer_load_dword v[[ARG0_LOAD:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
 ; GCN-DAG: buffer_load_dword v[[ARG1_LOAD0:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
 ; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
 ; GCN-DAG: buffer_store_dword v[[ARG0_LOAD]], off
@@ -552,9 +552,9 @@ define void @void_func_byval_i32_byval_i
 ; GCN-DAG: buffer_store_dwordx4 v[20:23], off
 ; GCN-DAG: buffer_store_dwordx4 v[24:27], off
 ; GCN-DAG: buffer_store_dwordx4 v[28:31], off
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1:[0-9]+]], off, s[0:3], s5{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:4
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:8
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:8
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:12
 
 ; GCN: buffer_store_dword v[[LOAD_ARG1]]
 ; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]{{\]}}, off
@@ -567,14 +567,14 @@ define void @void_func_v32i32_i32_i64(<3
 
 ; FIXME: Different ext load types on CI vs. VI
 ; GCN-LABEL: {{^}}void_func_v32i32_i1_i8_i16:
-; GCN-DAG: buffer_load_ubyte [[LOAD_ARG1:v[0-9]+]], off, s[0:3], s5{{$}}
-; VI-DAG: buffer_load_ushort [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; VI-DAG: buffer_load_ushort [[LOAD_ARG3:v[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; VI-DAG: buffer_load_ushort [[LOAD_ARG4:v[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-
-; CI-DAG: buffer_load_dword [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; CI-DAG: buffer_load_dword [[LOAD_ARG3:v[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; CI-DAG: buffer_load_dword [[LOAD_ARG4:v[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; GCN-DAG: buffer_load_ubyte [[LOAD_ARG1:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; VI-DAG: buffer_load_ushort [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; VI-DAG: buffer_load_ushort [[LOAD_ARG3:v[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; VI-DAG: buffer_load_ushort [[LOAD_ARG4:v[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+
+; CI-DAG: buffer_load_dword [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; CI-DAG: buffer_load_dword [[LOAD_ARG3:v[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; CI-DAG: buffer_load_dword [[LOAD_ARG4:v[0-9]+]], off, s[0:3], s5 offset:16{{$}}
 
 ; GCN-DAG: v_and_b32_e32 [[TRUNC_ARG1_I1:v[0-9]+]], 1, [[LOAD_ARG1]]
 ; CI-DAG: v_cvt_f16_f32_e32 [[CVT_ARG4:v[0-9]+]], [[LOAD_ARG4]]
@@ -595,10 +595,10 @@ define void @void_func_v32i32_i1_i8_i16(
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v2i32_v2f32:
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
 
 ; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_1]]{{\]}}, off
 ; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]{{\]}}, off
@@ -610,8 +610,8 @@ define void @void_func_v32i32_v2i32_v2f3
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v2i16_v2f16:
-; GFX9-DAG: buffer_load_dword [[LOAD_ARG1:v[0-9]+]], off, s[0:3], s5{{$}}
-; GFX9-DAG: buffer_load_dword [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GFX9-DAG: buffer_load_dword [[LOAD_ARG1:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GFX9-DAG: buffer_load_dword [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:8{{$}}
 ; GFX9: buffer_store_dword [[LOAD_ARG1]], off
 ; GFX9: buffer_store_short [[LOAD_ARG2]], off
 define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 {
@@ -622,15 +622,15 @@ define void @void_func_v32i32_v2i16_v2f1
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v2i64_v2f64:
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:32{{$}}
 
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off
@@ -642,15 +642,15 @@ define void @void_func_v32i32_v2i64_v2f6
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v4i32_v4f32:
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:32{{$}}
 
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off
@@ -662,23 +662,23 @@ define void @void_func_v32i32_v4i32_v4f3
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v8i32_v8f32:
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_4:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_5:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_6:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_7:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
-
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:32{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:36{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:40{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:44{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_4:[0-9]+]], off, s[0:3], s5 offset:48{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_5:[0-9]+]], off, s[0:3], s5 offset:52{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s5 offset:56{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s5 offset:60{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_4:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_5:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_6:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_7:[0-9]+]], off, s[0:3], s5 offset:32{{$}}
+
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:36{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:40{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:44{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:48{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_4:[0-9]+]], off, s[0:3], s5 offset:52{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_5:[0-9]+]], off, s[0:3], s5 offset:56{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s5 offset:60{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s5 offset:64{{$}}
 
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_4]]:[[LOAD_ARG1_7]]{{\]}}, off
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
@@ -692,39 +692,39 @@ define void @void_func_v32i32_v8i32_v8f3
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v16i32_v16f32:
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_4:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_5:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_6:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_7:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_8:[0-9]+]], off, s[0:3], s5 offset:32{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_9:[0-9]+]], off, s[0:3], s5 offset:36{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_10:[0-9]+]], off, s[0:3], s5 offset:40{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_11:[0-9]+]], off, s[0:3], s5 offset:44{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_12:[0-9]+]], off, s[0:3], s5 offset:48{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_13:[0-9]+]], off, s[0:3], s5 offset:52{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_14:[0-9]+]], off, s[0:3], s5 offset:56{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_15:[0-9]+]], off, s[0:3], s5 offset:60{{$}}
-
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:64{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:68{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:72{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:76{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_4:[0-9]+]], off, s[0:3], s5 offset:80{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_5:[0-9]+]], off, s[0:3], s5 offset:84{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s5 offset:88{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s5 offset:92{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_8:[0-9]+]], off, s[0:3], s5 offset:96{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_9:[0-9]+]], off, s[0:3], s5 offset:100{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_10:[0-9]+]], off, s[0:3], s5 offset:104{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_11:[0-9]+]], off, s[0:3], s5 offset:108{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_12:[0-9]+]], off, s[0:3], s5 offset:112{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_13:[0-9]+]], off, s[0:3], s5 offset:116{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_14:[0-9]+]], off, s[0:3], s5 offset:120{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_15:[0-9]+]], off, s[0:3], s5 offset:124{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_4:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_5:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_6:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_7:[0-9]+]], off, s[0:3], s5 offset:32{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_8:[0-9]+]], off, s[0:3], s5 offset:36{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_9:[0-9]+]], off, s[0:3], s5 offset:40{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_10:[0-9]+]], off, s[0:3], s5 offset:44{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_11:[0-9]+]], off, s[0:3], s5 offset:48{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_12:[0-9]+]], off, s[0:3], s5 offset:52{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_13:[0-9]+]], off, s[0:3], s5 offset:56{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_14:[0-9]+]], off, s[0:3], s5 offset:60{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_15:[0-9]+]], off, s[0:3], s5 offset:64{{$}}
+
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:68{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:72{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:76{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:80{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_4:[0-9]+]], off, s[0:3], s5 offset:84{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_5:[0-9]+]], off, s[0:3], s5 offset:88{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s5 offset:92{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s5 offset:96{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_8:[0-9]+]], off, s[0:3], s5 offset:100{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_9:[0-9]+]], off, s[0:3], s5 offset:104{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_10:[0-9]+]], off, s[0:3], s5 offset:108{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_11:[0-9]+]], off, s[0:3], s5 offset:112{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_12:[0-9]+]], off, s[0:3], s5 offset:116{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_13:[0-9]+]], off, s[0:3], s5 offset:120{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_14:[0-9]+]], off, s[0:3], s5 offset:124{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_15:[0-9]+]], off, s[0:3], s5 offset:128{{$}}
 define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef




More information about the llvm-commits mailing list