[llvm] 5281f5c - [AMDGPU] Add GFX9,GFX10,GFX11 checks for llvm.amdgcn.s.buffer.load

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 6 10:22:40 PST 2023


Author: Jay Foad
Date: 2023-03-06T18:19:50Z
New Revision: 5281f5c1e6dac83935748a43bbc90de8e5397391

URL: https://github.com/llvm/llvm-project/commit/5281f5c1e6dac83935748a43bbc90de8e5397391
DIFF: https://github.com/llvm/llvm-project/commit/5281f5c1e6dac83935748a43bbc90de8e5397391.diff

LOG: [AMDGPU] Add GFX9,GFX10,GFX11 checks for llvm.amdgcn.s.buffer.load

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
index 5e450a645f0e..b787aa364f1c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
@@ -1,7 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678,GFX67,GFX6
-; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678,GFX67,GFX78,GFX7
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678,GFX78,GFX8
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX678,GFX67,GFX6
+; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX67,GFX78,GFX7
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX8910,GFX78,GFX89,GFX8
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX789,GFX8910,GFX89,GFX910,GFX9
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX78910,GFX8910,GFX910,GFX10
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11
 
 define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
 ; GFX67-LABEL: s_buffer_load_imm:
@@ -12,13 +15,21 @@ define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
 ; GFX67-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
 ; GFX67-NEXT:    s_endpgm
 ;
-; GFX8-LABEL: s_buffer_load_imm:
-; GFX8:       ; %bb.0: ; %main_body
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], 0x4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
-; GFX8-NEXT:    s_endpgm
+; GFX8910-LABEL: s_buffer_load_imm:
+; GFX8910:       ; %bb.0: ; %main_body
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8910-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
+; GFX8910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_load_imm:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    exp mrt0 v0, v0, v0, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
   %bitcast = bitcast i32 %load to float
@@ -27,13 +38,21 @@ main_body:
 }
 
 define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) {
-; GFX678-LABEL: s_buffer_load_index:
-; GFX678:       ; %bb.0: ; %main_body
-; GFX678-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX678-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX678-NEXT:    v_mov_b32_e32 v0, s0
-; GFX678-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
-; GFX678-NEXT:    s_endpgm
+; GFX678910-LABEL: s_buffer_load_index:
+; GFX678910:       ; %bb.0: ; %main_body
+; GFX678910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX678910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX678910-NEXT:    v_mov_b32_e32 v0, s0
+; GFX678910-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
+; GFX678910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_load_index:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    exp mrt0 v0, v0, v0, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
   %bitcast = bitcast i32 %load to float
@@ -42,12 +61,19 @@ main_body:
 }
 
 define amdgpu_ps void @s_buffer_load_index_divergent(<4 x i32> inreg %desc, i32 %index) {
-; GFX678-LABEL: s_buffer_load_index_divergent:
-; GFX678:       ; %bb.0: ; %main_body
-; GFX678-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
-; GFX678-NEXT:    s_waitcnt vmcnt(0)
-; GFX678-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
-; GFX678-NEXT:    s_endpgm
+; GFX678910-LABEL: s_buffer_load_index_divergent:
+; GFX678910:       ; %bb.0: ; %main_body
+; GFX678910-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX678910-NEXT:    s_waitcnt vmcnt(0)
+; GFX678910-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
+; GFX678910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_load_index_divergent:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    exp mrt0 v0, v0, v0, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
   %bitcast = bitcast i32 %load to float
@@ -65,14 +91,23 @@ define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) {
 ; GFX67-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
 ; GFX67-NEXT:    s_endpgm
 ;
-; GFX8-LABEL: s_buffer_loadx2_imm:
-; GFX8:       ; %bb.0: ; %main_body
-; GFX8-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], 0x40
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
-; GFX8-NEXT:    s_endpgm
+; GFX8910-LABEL: s_buffer_loadx2_imm:
+; GFX8910:       ; %bb.0: ; %main_body
+; GFX8910-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], 0x40
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8910-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8910-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
+; GFX8910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_loadx2_imm:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], 0x40
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v1, s1
+; GFX11-NEXT:    exp mrt0 v0, v1, v0, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0)
   %bitcast = bitcast <2 x i32> %load to <2 x float>
@@ -83,14 +118,23 @@ main_body:
 }
 
 define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) {
-; GFX678-LABEL: s_buffer_loadx2_index:
-; GFX678:       ; %bb.0: ; %main_body
-; GFX678-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], s4
-; GFX678-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX678-NEXT:    v_mov_b32_e32 v0, s0
-; GFX678-NEXT:    v_mov_b32_e32 v1, s1
-; GFX678-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
-; GFX678-NEXT:    s_endpgm
+; GFX678910-LABEL: s_buffer_loadx2_index:
+; GFX678910:       ; %bb.0: ; %main_body
+; GFX678910-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], s4
+; GFX678910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX678910-NEXT:    v_mov_b32_e32 v0, s0
+; GFX678910-NEXT:    v_mov_b32_e32 v1, s1
+; GFX678910-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
+; GFX678910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_loadx2_index:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v1, s1
+; GFX11-NEXT:    exp mrt0 v0, v1, v0, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
   %bitcast = bitcast <2 x i32> %load to <2 x float>
@@ -101,12 +145,19 @@ main_body:
 }
 
 define amdgpu_ps void @s_buffer_loadx2_index_divergent(<4 x i32> inreg %desc, i32 %index) {
-; GFX678-LABEL: s_buffer_loadx2_index_divergent:
-; GFX678:       ; %bb.0: ; %main_body
-; GFX678-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
-; GFX678-NEXT:    s_waitcnt vmcnt(0)
-; GFX678-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
-; GFX678-NEXT:    s_endpgm
+; GFX678910-LABEL: s_buffer_loadx2_index_divergent:
+; GFX678910:       ; %bb.0: ; %main_body
+; GFX678910-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
+; GFX678910-NEXT:    s_waitcnt vmcnt(0)
+; GFX678910-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
+; GFX678910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_loadx2_index_divergent:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    exp mrt0 v0, v1, v0, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
   %bitcast = bitcast <2 x i32> %load to <2 x float>
@@ -127,15 +178,25 @@ define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) {
 ; GFX67-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
 ; GFX67-NEXT:    s_endpgm
 ;
-; GFX8-LABEL: s_buffer_loadx3_imm:
-; GFX8:       ; %bb.0: ; %main_body
-; GFX8-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
-; GFX8-NEXT:    s_endpgm
+; GFX8910-LABEL: s_buffer_loadx3_imm:
+; GFX8910:       ; %bb.0: ; %main_body
+; GFX8910-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8910-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8910-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8910-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
+; GFX8910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_loadx3_imm:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], 0x40
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v1, s1
+; GFX11-NEXT:    v_mov_b32_e32 v2, s2
+; GFX11-NEXT:    exp mrt0 v0, v1, v2, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 64, i32 0)
   %bitcast = bitcast <3 x i32> %load to <3 x float>
@@ -147,15 +208,25 @@ main_body:
 }
 
 define amdgpu_ps void @s_buffer_loadx3_index(<4 x i32> inreg %desc, i32 inreg %index) {
-; GFX678-LABEL: s_buffer_loadx3_index:
-; GFX678:       ; %bb.0: ; %main_body
-; GFX678-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], s4
-; GFX678-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX678-NEXT:    v_mov_b32_e32 v0, s0
-; GFX678-NEXT:    v_mov_b32_e32 v1, s1
-; GFX678-NEXT:    v_mov_b32_e32 v2, s2
-; GFX678-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
-; GFX678-NEXT:    s_endpgm
+; GFX678910-LABEL: s_buffer_loadx3_index:
+; GFX678910:       ; %bb.0: ; %main_body
+; GFX678910-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], s4
+; GFX678910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX678910-NEXT:    v_mov_b32_e32 v0, s0
+; GFX678910-NEXT:    v_mov_b32_e32 v1, s1
+; GFX678910-NEXT:    v_mov_b32_e32 v2, s2
+; GFX678910-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
+; GFX678910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_loadx3_index:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v1, s1
+; GFX11-NEXT:    v_mov_b32_e32 v2, s2
+; GFX11-NEXT:    exp mrt0 v0, v1, v2, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
   %bitcast = bitcast <3 x i32> %load to <3 x float>
@@ -174,12 +245,19 @@ define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i3
 ; GFX6-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
 ; GFX6-NEXT:    s_endpgm
 ;
-; GFX78-LABEL: s_buffer_loadx3_index_divergent:
-; GFX78:       ; %bb.0: ; %main_body
-; GFX78-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[0:3], 0 offen
-; GFX78-NEXT:    s_waitcnt vmcnt(0)
-; GFX78-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
-; GFX78-NEXT:    s_endpgm
+; GFX78910-LABEL: s_buffer_loadx3_index_divergent:
+; GFX78910:       ; %bb.0: ; %main_body
+; GFX78910-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[0:3], 0 offen
+; GFX78910-NEXT:    s_waitcnt vmcnt(0)
+; GFX78910-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
+; GFX78910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_loadx3_index_divergent:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], 0 offen
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    exp mrt0 v0, v1, v2, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
   %bitcast = bitcast <3 x i32> %load to <3 x float>
@@ -202,16 +280,27 @@ define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) {
 ; GFX67-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
 ; GFX67-NEXT:    s_endpgm
 ;
-; GFX8-LABEL: s_buffer_loadx4_imm:
-; GFX8:       ; %bb.0: ; %main_body
-; GFX8-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0xc8
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
-; GFX8-NEXT:    s_endpgm
+; GFX8910-LABEL: s_buffer_loadx4_imm:
+; GFX8910:       ; %bb.0: ; %main_body
+; GFX8910-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0xc8
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8910-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8910-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8910-NEXT:    v_mov_b32_e32 v3, s3
+; GFX8910-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
+; GFX8910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_loadx4_imm:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], 0xc8
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v1, s1
+; GFX11-NEXT:    v_mov_b32_e32 v2, s2
+; GFX11-NEXT:    v_mov_b32_e32 v3, s3
+; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0)
   %bitcast = bitcast <4 x i32> %load to <4 x float>
@@ -224,16 +313,27 @@ main_body:
 }
 
 define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) {
-; GFX678-LABEL: s_buffer_loadx4_index:
-; GFX678:       ; %bb.0: ; %main_body
-; GFX678-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], s4
-; GFX678-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX678-NEXT:    v_mov_b32_e32 v0, s0
-; GFX678-NEXT:    v_mov_b32_e32 v1, s1
-; GFX678-NEXT:    v_mov_b32_e32 v2, s2
-; GFX678-NEXT:    v_mov_b32_e32 v3, s3
-; GFX678-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
-; GFX678-NEXT:    s_endpgm
+; GFX678910-LABEL: s_buffer_loadx4_index:
+; GFX678910:       ; %bb.0: ; %main_body
+; GFX678910-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], s4
+; GFX678910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX678910-NEXT:    v_mov_b32_e32 v0, s0
+; GFX678910-NEXT:    v_mov_b32_e32 v1, s1
+; GFX678910-NEXT:    v_mov_b32_e32 v2, s2
+; GFX678910-NEXT:    v_mov_b32_e32 v3, s3
+; GFX678910-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
+; GFX678910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_loadx4_index:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v1, s1
+; GFX11-NEXT:    v_mov_b32_e32 v2, s2
+; GFX11-NEXT:    v_mov_b32_e32 v3, s3
+; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
   %bitcast = bitcast <4 x i32> %load to <4 x float>
@@ -246,12 +346,19 @@ main_body:
 }
 
 define amdgpu_ps void @s_buffer_loadx4_index_divergent(<4 x i32> inreg %desc, i32 %index) {
-; GFX678-LABEL: s_buffer_loadx4_index_divergent:
-; GFX678:       ; %bb.0: ; %main_body
-; GFX678-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
-; GFX678-NEXT:    s_waitcnt vmcnt(0)
-; GFX678-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
-; GFX678-NEXT:    s_endpgm
+; GFX678910-LABEL: s_buffer_loadx4_index_divergent:
+; GFX678910:       ; %bb.0: ; %main_body
+; GFX678910-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
+; GFX678910-NEXT:    s_waitcnt vmcnt(0)
+; GFX678910-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
+; GFX678910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_loadx4_index_divergent:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
   %bitcast = bitcast <4 x i32> %load to <4 x float>
@@ -273,14 +380,23 @@ define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
 ; GFX67-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
 ; GFX67-NEXT:    s_endpgm
 ;
-; GFX8-LABEL: s_buffer_load_imm_mergex2:
-; GFX8:       ; %bb.0: ; %main_body
-; GFX8-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
-; GFX8-NEXT:    s_endpgm
+; GFX8910-LABEL: s_buffer_load_imm_mergex2:
+; GFX8910:       ; %bb.0: ; %main_body
+; GFX8910-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8910-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8910-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
+; GFX8910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_load_imm_mergex2:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], 0x4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v1, s1
+; GFX11-NEXT:    exp mrt0 v0, v1, v0, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
   %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
@@ -302,16 +418,27 @@ define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
 ; GFX67-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
 ; GFX67-NEXT:    s_endpgm
 ;
-; GFX8-LABEL: s_buffer_load_imm_mergex4:
-; GFX8:       ; %bb.0: ; %main_body
-; GFX8-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
-; GFX8-NEXT:    s_endpgm
+; GFX8910-LABEL: s_buffer_load_imm_mergex4:
+; GFX8910:       ; %bb.0: ; %main_body
+; GFX8910-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8910-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8910-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8910-NEXT:    v_mov_b32_e32 v3, s3
+; GFX8910-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
+; GFX8910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_load_imm_mergex4:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], 0x8
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    v_mov_b32_e32 v1, s1
+; GFX11-NEXT:    v_mov_b32_e32 v2, s2
+; GFX11-NEXT:    v_mov_b32_e32 v3, s3
+; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
   %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0)
@@ -379,6 +506,54 @@ define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
 ; GFX8-NEXT:    s_endpgm
+;
+; GFX9-LABEL: s_buffer_load_index_across_bb:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_getpc_b64 s[4:5]
+; GFX9-NEXT:    s_add_u32 s4, s4, gv at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s5, s5, gv at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_store_dword v1, v0, s[4:5]
+; GFX9-NEXT:    v_or_b32_e32 v0, 8, v0
+; GFX9-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: s_buffer_load_index_across_bb:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_getpc_b64 s[4:5]
+; GFX10-NEXT:    s_add_u32 s4, s4, gv at gotpcrel32@lo+4
+; GFX10-NEXT:    s_addc_u32 s5, s5, gv at gotpcrel32@hi+12
+; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    global_store_dword v1, v0, s[4:5]
+; GFX10-NEXT:    v_or_b32_e32 v0, 8, v0
+; GFX10-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_load_index_across_bb:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    s_getpc_b64 s[4:5]
+; GFX11-NEXT:    s_add_u32 s4, s4, gv at gotpcrel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s5, s5, gv at gotpcrel32@hi+12
+; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
+; GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT:    v_or_b32_e32 v0, 8, v0
+; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    exp mrt0 v0, v0, v0, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %tmp = shl i32 %index, 4
   store i32 %tmp, ptr addrspace(1) @gv
@@ -393,13 +568,21 @@ bb1:                                              ; preds = %main_body
 }
 
 define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
-; GFX678-LABEL: s_buffer_load_index_across_bb_merged:
-; GFX678:       ; %bb.0: ; %main_body
-; GFX678-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
-; GFX678-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen offset:8
-; GFX678-NEXT:    s_waitcnt vmcnt(0)
-; GFX678-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
-; GFX678-NEXT:    s_endpgm
+; GFX678910-LABEL: s_buffer_load_index_across_bb_merged:
+; GFX678910:       ; %bb.0: ; %main_body
+; GFX678910-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX678910-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen offset:8
+; GFX678910-NEXT:    s_waitcnt vmcnt(0)
+; GFX678910-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
+; GFX678910-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_buffer_load_index_across_bb_merged:
+; GFX11:       ; %bb.0: ; %main_body
+; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:8
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    exp mrt0 v0, v1, v0, v0 done
+; GFX11-NEXT:    s_endpgm
 main_body:
   %tmp = shl i32 %index, 4
   br label %bb1
@@ -424,12 +607,19 @@ define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) {
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
-; GFX78-LABEL: s_buffer_load_imm_neg1:
-; GFX78:       ; %bb.0:
-; GFX78-NEXT:    s_mov_b32 s4, -1
-; GFX78-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX78-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX78-NEXT:    ; return to shader part epilog
+; GFX78910-LABEL: s_buffer_load_imm_neg1:
+; GFX78910:       ; %bb.0:
+; GFX78910-NEXT:    s_mov_b32 s4, -1
+; GFX78910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX78910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX78910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_neg1:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_mov_b32 s4, -1
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
   ret i32 %load
 }
@@ -449,12 +639,19 @@ define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_neg4:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s4, -4
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_neg4:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_mov_b32 s4, -4
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_neg4:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_mov_b32 s4, -4
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
   ret i32 %load
 }
@@ -474,12 +671,19 @@ define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_neg8:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s4, -8
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_neg8:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_mov_b32 s4, -8
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_neg8:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_mov_b32 s4, -8
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
   ret i32 %load
 }
@@ -499,12 +703,19 @@ define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_bit31:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_brev_b32 s4, 1
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_bit31:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_brev_b32 s4, 1
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_bit31:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_brev_b32 s4, 1
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
   ret i32 %load
 }
@@ -524,12 +735,19 @@ define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_bit30:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s4, 2.0
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_bit30:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_mov_b32 s4, 2.0
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_bit30:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_mov_b32 s4, 2.0
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0)
   ret i32 %load
 }
@@ -549,12 +767,19 @@ define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_bit29:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_brev_b32 s4, 4
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_bit29:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_brev_b32 s4, 4
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_bit29:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_brev_b32 s4, 4
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
   ret i32 %load
 }
@@ -574,12 +799,19 @@ define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_bit21:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s4, 0x200000
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_bit21:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_mov_b32 s4, 0x200000
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_bit21:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_mov_b32 s4, 0x200000
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
   ret i32 %load
 }
@@ -599,12 +831,19 @@ define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_bit20:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s4, 0x100000
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_bit20:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_mov_b32 s4, 0x100000
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_bit20:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_mov_b32 s4, 0x100000
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
   ret i32 %load
 }
@@ -624,12 +863,19 @@ define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_neg_bit20:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s4, 0xfff00000
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_neg_bit20:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_mov_b32 s4, 0xfff00000
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_neg_bit20:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_mov_b32 s4, 0xfff00000
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32  -1048576, i32 0)
   ret i32 %load
 }
@@ -649,11 +895,17 @@ define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_bit19:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], 0x80000
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_bit19:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x80000
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_bit19:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x80000
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
   ret i32 %load
 }
@@ -673,12 +925,19 @@ define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_neg_bit19:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_mov_b32 s4, 0xfff80000
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_neg_bit19:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_mov_b32 s4, 0xfff80000
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], s4
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_neg_bit19:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_mov_b32 s4, 0xfff80000
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
   ret i32 %load
 }
@@ -699,11 +958,17 @@ define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_255:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], 0xff
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_255:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0xff
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_255:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0xff
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0)
   ret i32 %load
 }
@@ -715,11 +980,17 @@ define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) {
 ; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX67-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_256:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], 0x100
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_256:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x100
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_256:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x100
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0)
   ret i32 %load
 }
@@ -731,11 +1002,17 @@ define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) {
 ; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX67-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_1016:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3f8
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_1016:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3f8
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_1016:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x3f8
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0)
   ret i32 %load
 }
@@ -747,11 +1024,17 @@ define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) {
 ; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX67-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_1020:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3fc
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_1020:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3fc
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_1020:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x3fc
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0)
   ret i32 %load
 }
@@ -772,11 +1055,17 @@ define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_1021:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3fd
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_1021:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3fd
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_1021:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x3fd
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0)
   ret i32 %load
 }
@@ -796,11 +1085,17 @@ define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_1024:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], 0x400
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_1024:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x400
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_1024:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x400
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
   ret i32 %load
 }
@@ -821,11 +1116,17 @@ define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_1025:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], 0x401
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_1025:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x401
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_1025:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x401
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0)
   ret i32 %load
 }
@@ -845,11 +1146,17 @@ define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
-; GFX8-LABEL: s_buffer_load_imm_1028:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], 0x400
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    ; return to shader part epilog
+; GFX8910-LABEL: s_buffer_load_imm_1028:
+; GFX8910:       ; %bb.0:
+; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x400
+; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8910-NEXT:    ; return to shader part epilog
+;
+; GFX11-LABEL: s_buffer_load_imm_1028:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x400
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    ; return to shader part epilog
   %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
   ret i32 %load
 }
@@ -859,3 +1166,11 @@ declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
 declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32)
 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX678: {{.*}}
+; GFX6789: {{.*}}
+; GFX78: {{.*}}
+; GFX789: {{.*}}
+; GFX89: {{.*}}
+; GFX910: {{.*}}


        


More information about the llvm-commits mailing list