[llvm] e13d2ef - [AMDGPU] Add GlobalISel checks for flat scratch SVS addressing

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 20 04:16:06 PDT 2022


Author: Jay Foad
Date: 2022-04-20T12:06:39+01:00
New Revision: e13d2efed663fc231a32a663669fb05fabe10f83

URL: https://github.com/llvm/llvm-project/commit/e13d2efed663fc231a32a663669fb05fabe10f83
DIFF: https://github.com/llvm/llvm-project/commit/e13d2efed663fc231a32a663669fb05fabe10f83.diff

LOG: [AMDGPU] Add GlobalISel checks for flat scratch SVS addressing

Note that GlobalISel does not actually use the SVS addressing mode
for these cases yet because it chooses the VGPR bank for
G_FRAME_INDEX; see the TODO comment in
AMDGPURegisterBankInfo::getInstrMapping.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index 22bfd0b12096e..ca1ddea4cab2f 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefix=GFX940
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL
 
 ; Test flat scratch SVS addressing mode with various combinations of alignment
 ; of soffset, voffset and inst_offset.
@@ -7,21 +8,39 @@
 declare i32 @llvm.amdgcn.workitem.id.x()
 
 define amdgpu_kernel void @soff1_voff1(i32 %soff) {
-; GFX940-LABEL: soff1_voff1:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_add_i32 s0, s0, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX940-SDAG-LABEL: soff1_voff1:
+; GFX940-SDAG:       ; %bb.0: ; %bb
+; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    s_endpgm
+;
+; GFX940-GISEL-LABEL: soff1_voff1:
+; GFX940-GISEL:       ; %bb.0: ; %bb
+; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
+; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    s_endpgm
 bb:
   %soff1 = mul i32 %soff, 1
   %a = alloca i8, i32 64, align 4, addrspace(5)
@@ -39,22 +58,41 @@ bb:
 }
 
 define amdgpu_kernel void @soff1_voff2(i32 %soff) {
-; GFX940-LABEL: soff1_voff2:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    v_mul_u32_u24_e32 v0, 2, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_add_i32 s0, s0, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX940-SDAG-LABEL: soff1_voff2:
+; GFX940-SDAG:       ; %bb.0: ; %bb
+; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-SDAG-NEXT:    v_mul_u32_u24_e32 v0, 2, v0
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    s_endpgm
+;
+; GFX940-GISEL-LABEL: soff1_voff2:
+; GFX940-GISEL:       ; %bb.0: ; %bb
+; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    v_mul_u32_u24_e32 v0, 2, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    s_endpgm
 bb:
   %soff1 = mul i32 %soff, 1
   %a = alloca i8, i32 64, align 4, addrspace(5)
@@ -72,22 +110,41 @@ bb:
 }
 
 define amdgpu_kernel void @soff1_voff4(i32 %soff) {
-; GFX940-LABEL: soff1_voff4:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    v_mul_u32_u24_e32 v0, 4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_add_i32 s0, s0, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX940-SDAG-LABEL: soff1_voff4:
+; GFX940-SDAG:       ; %bb.0: ; %bb
+; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-SDAG-NEXT:    v_mul_u32_u24_e32 v0, 4, v0
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    s_endpgm
+;
+; GFX940-GISEL-LABEL: soff1_voff4:
+; GFX940-GISEL:       ; %bb.0: ; %bb
+; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    v_mul_u32_u24_e32 v0, 4, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    s_endpgm
 bb:
   %soff1 = mul i32 %soff, 1
   %a = alloca i8, i32 64, align 4, addrspace(5)
@@ -105,22 +162,41 @@ bb:
 }
 
 define amdgpu_kernel void @soff2_voff1(i32 %soff) {
-; GFX940-LABEL: soff2_voff1:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-NEXT:    s_add_i32 s0, s0, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX940-SDAG-LABEL: soff2_voff1:
+; GFX940-SDAG:       ; %bb.0: ; %bb
+; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    s_endpgm
+;
+; GFX940-GISEL-LABEL: soff2_voff1:
+; GFX940-GISEL:       ; %bb.0: ; %bb
+; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
+; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    s_endpgm
 bb:
   %soff2 = mul i32 %soff, 2
   %a = alloca i8, i32 64, align 4, addrspace(5)
@@ -138,23 +214,43 @@ bb:
 }
 
 define amdgpu_kernel void @soff2_voff2(i32 %soff) {
-; GFX940-LABEL: soff2_voff2:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    v_mul_u32_u24_e32 v0, 2, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-NEXT:    s_add_i32 s0, s0, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX940-SDAG-LABEL: soff2_voff2:
+; GFX940-SDAG:       ; %bb.0: ; %bb
+; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-SDAG-NEXT:    v_mul_u32_u24_e32 v0, 2, v0
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    s_endpgm
+;
+; GFX940-GISEL-LABEL: soff2_voff2:
+; GFX940-GISEL:       ; %bb.0: ; %bb
+; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    v_mul_u32_u24_e32 v0, 2, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    s_endpgm
 bb:
   %soff2 = mul i32 %soff, 2
   %a = alloca i8, i32 64, align 4, addrspace(5)
@@ -172,23 +268,43 @@ bb:
 }
 
 define amdgpu_kernel void @soff2_voff4(i32 %soff) {
-; GFX940-LABEL: soff2_voff4:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    v_mul_u32_u24_e32 v0, 4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX940-NEXT:    s_add_i32 s0, s0, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX940-SDAG-LABEL: soff2_voff4:
+; GFX940-SDAG:       ; %bb.0: ; %bb
+; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-SDAG-NEXT:    v_mul_u32_u24_e32 v0, 4, v0
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    s_endpgm
+;
+; GFX940-GISEL-LABEL: soff2_voff4:
+; GFX940-GISEL:       ; %bb.0: ; %bb
+; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    v_mul_u32_u24_e32 v0, 4, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    s_endpgm
 bb:
   %soff2 = mul i32 %soff, 2
   %a = alloca i8, i32 64, align 4, addrspace(5)
@@ -206,22 +322,41 @@ bb:
 }
 
 define amdgpu_kernel void @soff4_voff1(i32 %soff) {
-; GFX940-LABEL: soff4_voff1:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    s_add_i32 s0, s0, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX940-SDAG-LABEL: soff4_voff1:
+; GFX940-SDAG:       ; %bb.0: ; %bb
+; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    s_endpgm
+;
+; GFX940-GISEL-LABEL: soff4_voff1:
+; GFX940-GISEL:       ; %bb.0: ; %bb
+; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
+; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    s_endpgm
 bb:
   %soff4 = mul i32 %soff, 4
   %a = alloca i8, i32 64, align 4, addrspace(5)
@@ -239,23 +374,43 @@ bb:
 }
 
 define amdgpu_kernel void @soff4_voff2(i32 %soff) {
-; GFX940-LABEL: soff4_voff2:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    v_mul_u32_u24_e32 v0, 2, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    s_add_i32 s0, s0, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX940-SDAG-LABEL: soff4_voff2:
+; GFX940-SDAG:       ; %bb.0: ; %bb
+; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-SDAG-NEXT:    v_mul_u32_u24_e32 v0, 2, v0
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    s_endpgm
+;
+; GFX940-GISEL-LABEL: soff4_voff2:
+; GFX940-GISEL:       ; %bb.0: ; %bb
+; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    v_mul_u32_u24_e32 v0, 2, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    s_endpgm
 bb:
   %soff4 = mul i32 %soff, 4
   %a = alloca i8, i32 64, align 4, addrspace(5)
@@ -273,23 +428,43 @@ bb:
 }
 
 define amdgpu_kernel void @soff4_voff4(i32 %soff) {
-; GFX940-LABEL: soff4_voff4:
-; GFX940:       ; %bb.0: ; %bb
-; GFX940-NEXT:    s_load_dword s0, s[0:1], 0x24
-; GFX940-NEXT:    v_mov_b32_e32 v1, 1
-; GFX940-NEXT:    v_mul_u32_u24_e32 v0, 4, v0
-; GFX940-NEXT:    v_mov_b32_e32 v2, 2
-; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX940-NEXT:    s_add_i32 s0, s0, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v1, 4
-; GFX940-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
-; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_endpgm
+; GFX940-SDAG-LABEL: soff4_voff4:
+; GFX940-SDAG:       ; %bb.0: ; %bb
+; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
+; GFX940-SDAG-NEXT:    v_mul_u32_u24_e32 v0, 4, v0
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
+; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-SDAG-NEXT:    s_endpgm
+;
+; GFX940-GISEL-LABEL: soff4_voff4:
+; GFX940-GISEL:       ; %bb.0: ; %bb
+; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    v_mul_u32_u24_e32 v0, 4, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
+; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
+; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
+; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-GISEL-NEXT:    s_endpgm
 bb:
   %soff4 = mul i32 %soff, 4
   %a = alloca i8, i32 64, align 4, addrspace(5)


        


More information about the llvm-commits mailing list