[llvm] ab174c5 - [AMDGPU] Add more tests for buffer intrinsics

Piotr Sobczak via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 23 05:45:54 PST 2023


Author: Piotr Sobczak
Date: 2023-02-23T14:39:12+01:00
New Revision: ab174c57f4177fc08ca5d25395114220f55764f1

URL: https://github.com/llvm/llvm-project/commit/ab174c57f4177fc08ca5d25395114220f55764f1
DIFF: https://github.com/llvm/llvm-project/commit/ab174c57f4177fc08ca5d25395114220f55764f1.diff

LOG: [AMDGPU] Add more tests for buffer intrinsics

Add more tests for buffer intrinsics with large voffsets.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
index e6aa6a44bf39..0ef231933844 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
@@ -172,6 +172,134 @@ main_body:
   ret <4 x float> %data
 }
 
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_12bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_12bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 4092, i32 0, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_13bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0x1000
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_13bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_13bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 8188, i32 0, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_16bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0xf000
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_16bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xf000
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_16bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0xf000
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 65532, i32 0, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_23bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_23bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_23bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 8388604, i32 0, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_24bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0xfff000
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_24bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xfff000
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_24bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0xfff000
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 16777212, i32 0, i32 0)
+  ret <4 x float> %data
+}
+
+
 define amdgpu_ps float @buffer_load_x1(<4 x i32> inreg %rsrc, i32 %ofs) {
 ; PREGFX10-LABEL: buffer_load_x1:
 ; PREGFX10:       ; %bb.0: ; %main_body
@@ -238,7 +366,6 @@ main_body:
   ret <4 x float> %data
 }
 
-
 define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, ptr addrspace(3) %lds) {
 ; GFX10-LABEL: buffer_load_mmo:
 ; GFX10:       ; %bb.0: ; %entry

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
index c1d08a1fc97c..4705021ee9f9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
@@ -72,6 +72,133 @@ main_body:
     ret <4 x float> %vdata.f
 }
 
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_12bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_12bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 4092, i32 0, i32 63, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0x1000
+; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_13bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_13bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0x1000
+; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8188, i32 0, i32 63, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0xf000
+; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_16bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xf000
+; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_16bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0xf000
+; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 65532, i32 0, i32 63, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_23bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_23bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0x7ff000
+; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8388604, i32 0, i32 63, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0xfff000
+; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_24bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xfff000
+; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_24bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0xfff000
+; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 16777212, i32 0, i32 63, i32 0)
+  ret <4 x float> %data
+}
+
 define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
 ; PREGFX10-LABEL: tbuffer_load_immoffs_large:
 ; PREGFX10:       ; %bb.0:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
index 6e995b0c8234..b7ef3d376d39 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
@@ -151,6 +151,123 @@ main_body:
   ret void
 }
 
+define amdgpu_ps void @buffer_store_voffset_large_12bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; PREGFX10-LABEL: buffer_store_voffset_large_12bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offset:4092
+; PREGFX10-NEXT:    s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_12bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offset:4092
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_12bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    tbuffer_store_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 4092, i32 0, i32 63, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_13bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; PREGFX10-LABEL: buffer_store_voffset_large_13bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v4, 0x1000
+; PREGFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT:    s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_13bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0x1000
+; GFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_13bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v4, 0x1000
+; GFX11-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 8188, i32 0, i32 63, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_16bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; PREGFX10-LABEL: buffer_store_voffset_large_16bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v4, 0xf000
+; PREGFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT:    s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_16bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0xf000
+; GFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_16bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v4, 0xf000
+; GFX11-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 65532, i32 0, i32 63, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_23bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; PREGFX10-LABEL: buffer_store_voffset_large_23bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v4, 0x7ff000
+; PREGFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT:    s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_23bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0x7ff000
+; GFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_23bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v4, 0x7ff000
+; GFX11-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 8388604, i32 0, i32 63, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_24bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; PREGFX10-LABEL: buffer_store_voffset_large_24bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v4, 0xfff000
+; PREGFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT:    s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_24bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0xfff000
+; GFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_24bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v4, 0xfff000
+; GFX11-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 16777212, i32 0, i32 63, i32 0)
+  ret void
+}
+
 declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
 declare void @llvm.amdgcn.raw.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
 declare void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
index cda6de4840aa..91557d75b75e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
@@ -135,6 +135,160 @@ main_body:
   ret <4 x float> %data
 }
 
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
+; GFX6-LABEL: buffer_load_voffset_large_12bit:
+; GFX6:       ; %bb.0: ; %main_body
+; GFX6-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8PLUS-LABEL: buffer_load_voffset_large_12bit:
+; GFX8PLUS:       ; %bb.0: ; %main_body
+; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
+; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
+; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
+; GFX8PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_12bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 0, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(<4 x i32> inreg) {
+; GFX6-LABEL: buffer_load_voffset_large_13bit:
+; GFX6:       ; %bb.0: ; %main_body
+; GFX6-NEXT:    s_mov_b32 s4, 0
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0x1000
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8PLUS-LABEL: buffer_load_voffset_large_13bit:
+; GFX8PLUS:       ; %bb.0: ; %main_body
+; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
+; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0x1000
+; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
+; GFX8PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_13bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 8188, i32 0, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(<4 x i32> inreg) {
+; GFX6-LABEL: buffer_load_voffset_large_16bit:
+; GFX6:       ; %bb.0: ; %main_body
+; GFX6-NEXT:    s_mov_b32 s4, 0
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0xf000
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8PLUS-LABEL: buffer_load_voffset_large_16bit:
+; GFX8PLUS:       ; %bb.0: ; %main_body
+; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
+; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0xf000
+; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
+; GFX8PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_16bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 65532, i32 0, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(<4 x i32> inreg) {
+; GFX6-LABEL: buffer_load_voffset_large_23bit:
+; GFX6:       ; %bb.0: ; %main_body
+; GFX6-NEXT:    s_mov_b32 s4, 0
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0x7ff000
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8PLUS-LABEL: buffer_load_voffset_large_23bit:
+; GFX8PLUS:       ; %bb.0: ; %main_body
+; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
+; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0x7ff000
+; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
+; GFX8PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_23bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 8388604, i32 0, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(<4 x i32> inreg) {
+; GFX6-LABEL: buffer_load_voffset_large_24bit:
+; GFX6:       ; %bb.0: ; %main_body
+; GFX6-NEXT:    s_mov_b32 s4, 0
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0xfff000
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    ; return to shader part epilog
+;
+; GFX8PLUS-LABEL: buffer_load_voffset_large_24bit:
+; GFX8PLUS:       ; %bb.0: ; %main_body
+; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
+; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0xfff000
+; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
+; GFX8PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_24bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 16777212, i32 0, i32 0)
+  ret <4 x float> %data
+}
+
 define amdgpu_ps <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) {
 ; GFX6-LABEL: buffer_load_idx:
 ; GFX6:       ; %bb.0: ; %main_body

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
index f38e7908b9af..0b4e11a54965 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
@@ -286,6 +286,160 @@ define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
     ret float %vdata.f
 }
 
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0
+; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_12bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_12bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 0, i32 63, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    s_mov_b32 s4, 0
+; PREGFX10-NEXT:    v_mov_b32_e32 v1, 0x1000
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, s4
+; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_13bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s4, 0
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0x1000
+; GFX10-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_13bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 8188, i32 0, i32 63, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    s_mov_b32 s4, 0
+; PREGFX10-NEXT:    v_mov_b32_e32 v1, 0xf000
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, s4
+; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_16bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s4, 0
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0xf000
+; GFX10-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_16bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 65532, i32 0, i32 63, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    s_mov_b32 s4, 0
+; PREGFX10-NEXT:    v_mov_b32_e32 v1, 0x7ff000
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, s4
+; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_23bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s4, 0
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7ff000
+; GFX10-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_23bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 8388604, i32 0, i32 63, i32 0)
+  ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    s_mov_b32 s4, 0
+; PREGFX10-NEXT:    v_mov_b32_e32 v1, 0xfff000
+; PREGFX10-NEXT:    v_mov_b32_e32 v0, s4
+; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_24bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s4, 0
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0xfff000
+; GFX10-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_24bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
+main_body:
+  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 16777212, i32 0, i32 63, i32 0)
+  ret <4 x float> %data
+}
+
 declare i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32, i32)
 declare <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32, i32)
 declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
index 885cb470a64f..44870bf8c2e3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
@@ -305,6 +305,188 @@ main_body:
   ret void
 }
 
+define amdgpu_ps void @buffer_store_voffset_large_12bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; VERDE-LABEL: buffer_store_voffset_large_12bit:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v4, 0
+; VERDE-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offset:4092
+; VERDE-NEXT:    s_endpgm
+;
+; PREGFX10-LABEL: buffer_store_voffset_large_12bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    v_mov_b32_e32 v4, 0
+; PREGFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offset:4092
+; PREGFX10-NEXT:    s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_12bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0
+; GFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offset:4092
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_12bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_mov_b32_e32 v4, 0
+; GFX11-NEXT:    tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 0, i32 4092, i32 0, i32 63, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_13bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; VERDE-LABEL: buffer_store_voffset_large_13bit:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b32 s4, 0
+; VERDE-NEXT:    v_mov_b32_e32 v5, 0x1000
+; VERDE-NEXT:    v_mov_b32_e32 v4, s4
+; VERDE-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; VERDE-NEXT:    s_endpgm
+;
+; PREGFX10-LABEL: buffer_store_voffset_large_13bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    s_mov_b32 s4, 0
+; PREGFX10-NEXT:    v_mov_b32_e32 v5, 0x1000
+; PREGFX10-NEXT:    v_mov_b32_e32 v4, s4
+; PREGFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT:    s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_13bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s4, 0
+; GFX10-NEXT:    v_mov_b32_e32 v5, 0x1000
+; GFX10-NEXT:    v_mov_b32_e32 v4, s4
+; GFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_13bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v5, 0x1000 :: v_dual_mov_b32 v4, s4
+; GFX11-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 0, i32 8188, i32 0, i32 63, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_16bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; VERDE-LABEL: buffer_store_voffset_large_16bit:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b32 s4, 0
+; VERDE-NEXT:    v_mov_b32_e32 v5, 0xf000
+; VERDE-NEXT:    v_mov_b32_e32 v4, s4
+; VERDE-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; VERDE-NEXT:    s_endpgm
+;
+; PREGFX10-LABEL: buffer_store_voffset_large_16bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    s_mov_b32 s4, 0
+; PREGFX10-NEXT:    v_mov_b32_e32 v5, 0xf000
+; PREGFX10-NEXT:    v_mov_b32_e32 v4, s4
+; PREGFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT:    s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_16bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s4, 0
+; GFX10-NEXT:    v_mov_b32_e32 v5, 0xf000
+; GFX10-NEXT:    v_mov_b32_e32 v4, s4
+; GFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_16bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v5, 0xf000 :: v_dual_mov_b32 v4, s4
+; GFX11-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 0, i32 65532, i32 0, i32 63, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_23bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; VERDE-LABEL: buffer_store_voffset_large_23bit:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b32 s4, 0
+; VERDE-NEXT:    v_mov_b32_e32 v5, 0x7ff000
+; VERDE-NEXT:    v_mov_b32_e32 v4, s4
+; VERDE-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; VERDE-NEXT:    s_endpgm
+;
+; PREGFX10-LABEL: buffer_store_voffset_large_23bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    s_mov_b32 s4, 0
+; PREGFX10-NEXT:    v_mov_b32_e32 v5, 0x7ff000
+; PREGFX10-NEXT:    v_mov_b32_e32 v4, s4
+; PREGFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT:    s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_23bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s4, 0
+; GFX10-NEXT:    v_mov_b32_e32 v5, 0x7ff000
+; GFX10-NEXT:    v_mov_b32_e32 v4, s4
+; GFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_23bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v5, 0x7ff000 :: v_dual_mov_b32 v4, s4
+; GFX11-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 0, i32 8388604, i32 0, i32 63, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_24bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; VERDE-LABEL: buffer_store_voffset_large_24bit:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b32 s4, 0
+; VERDE-NEXT:    v_mov_b32_e32 v5, 0xfff000
+; VERDE-NEXT:    v_mov_b32_e32 v4, s4
+; VERDE-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; VERDE-NEXT:    s_endpgm
+;
+; PREGFX10-LABEL: buffer_store_voffset_large_24bit:
+; PREGFX10:       ; %bb.0: ; %main_body
+; PREGFX10-NEXT:    s_mov_b32 s4, 0
+; PREGFX10-NEXT:    v_mov_b32_e32 v5, 0xfff000
+; PREGFX10-NEXT:    v_mov_b32_e32 v4, s4
+; PREGFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT:    s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_24bit:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s4, 0
+; GFX10-NEXT:    v_mov_b32_e32 v5, 0xfff000
+; GFX10-NEXT:    v_mov_b32_e32 v4, s4
+; GFX10-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_24bit:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_mov_b32 s4, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v5, 0xfff000 :: v_dual_mov_b32 v4, s4
+; GFX11-NEXT:    tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 0, i32 16777212, i32 0, i32 63, i32 0)
+  ret void
+}
+
 declare void @llvm.amdgcn.struct.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32) #0
 declare void @llvm.amdgcn.struct.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0
 declare void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0


        


More information about the llvm-commits mailing list