[llvm] defce20 - [AMDGPU] Add a test for flat scratch SVS addressing
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 14 01:44:37 PDT 2022
Author: Jay Foad
Date: 2022-04-14T09:39:16+01:00
New Revision: defce20cbb774ebc818a15445bc21a38739afad6
URL: https://github.com/llvm/llvm-project/commit/defce20cbb774ebc818a15445bc21a38739afad6
DIFF: https://github.com/llvm/llvm-project/commit/defce20cbb774ebc818a15445bc21a38739afad6.diff
LOG: [AMDGPU] Add a test for flat scratch SVS addressing
Added:
llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
new file mode 100644
index 0000000000000..22bfd0b12096e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -0,0 +1,307 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefix=GFX940
+
+; Test flat scratch SVS addressing mode with various combinations of alignment
+; of soffset, voffset and inst_offset.
+
+declare i32 @llvm.amdgcn.workitem.id.x()
+
+define amdgpu_kernel void @soff1_voff1(i32 %soff) {
+; GFX940-LABEL: soff1_voff1:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: v_mov_b32_e32 v2, 2
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+bb:
+ %soff1 = mul i32 %soff, 1
+ %a = alloca i8, i32 64, align 4, addrspace(5)
+ %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
+ %voff = call i32 @llvm.amdgcn.workitem.id.x()
+ %voff1 = mul i32 %voff, 1
+ %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
+ %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
+ store volatile i8 1, i8 addrspace(5)* %p1
+ %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
+ store volatile i8 2, i8 addrspace(5)* %p2
+ %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
+ store volatile i8 4, i8 addrspace(5)* %p4
+ ret void
+}
+
+define amdgpu_kernel void @soff1_voff2(i32 %soff) {
+; GFX940-LABEL: soff1_voff2:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: v_mul_u32_u24_e32 v0, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 2
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+bb:
+ %soff1 = mul i32 %soff, 1
+ %a = alloca i8, i32 64, align 4, addrspace(5)
+ %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
+ %voff = call i32 @llvm.amdgcn.workitem.id.x()
+ %voff2 = mul i32 %voff, 2
+ %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
+ %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
+ store volatile i8 1, i8 addrspace(5)* %p1
+ %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
+ store volatile i8 2, i8 addrspace(5)* %p2
+ %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
+ store volatile i8 4, i8 addrspace(5)* %p4
+ ret void
+}
+
+define amdgpu_kernel void @soff1_voff4(i32 %soff) {
+; GFX940-LABEL: soff1_voff4:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: v_mul_u32_u24_e32 v0, 4, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 2
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+bb:
+ %soff1 = mul i32 %soff, 1
+ %a = alloca i8, i32 64, align 4, addrspace(5)
+ %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
+ %voff = call i32 @llvm.amdgcn.workitem.id.x()
+ %voff4 = mul i32 %voff, 4
+ %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
+ %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
+ store volatile i8 1, i8 addrspace(5)* %p1
+ %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
+ store volatile i8 2, i8 addrspace(5)* %p2
+ %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
+ store volatile i8 4, i8 addrspace(5)* %p4
+ ret void
+}
+
+define amdgpu_kernel void @soff2_voff1(i32 %soff) {
+; GFX940-LABEL: soff2_voff1:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: v_mov_b32_e32 v2, 2
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s0, s0, 1
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+bb:
+ %soff2 = mul i32 %soff, 2
+ %a = alloca i8, i32 64, align 4, addrspace(5)
+ %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
+ %voff = call i32 @llvm.amdgcn.workitem.id.x()
+ %voff1 = mul i32 %voff, 1
+ %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
+ %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
+ store volatile i8 1, i8 addrspace(5)* %p1
+ %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
+ store volatile i8 2, i8 addrspace(5)* %p2
+ %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
+ store volatile i8 4, i8 addrspace(5)* %p4
+ ret void
+}
+
+define amdgpu_kernel void @soff2_voff2(i32 %soff) {
+; GFX940-LABEL: soff2_voff2:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: v_mul_u32_u24_e32 v0, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 2
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s0, s0, 1
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+bb:
+ %soff2 = mul i32 %soff, 2
+ %a = alloca i8, i32 64, align 4, addrspace(5)
+ %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
+ %voff = call i32 @llvm.amdgcn.workitem.id.x()
+ %voff2 = mul i32 %voff, 2
+ %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
+ %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
+ store volatile i8 1, i8 addrspace(5)* %p1
+ %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
+ store volatile i8 2, i8 addrspace(5)* %p2
+ %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
+ store volatile i8 4, i8 addrspace(5)* %p4
+ ret void
+}
+
+define amdgpu_kernel void @soff2_voff4(i32 %soff) {
+; GFX940-LABEL: soff2_voff4:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: v_mul_u32_u24_e32 v0, 4, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 2
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s0, s0, 1
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+bb:
+ %soff2 = mul i32 %soff, 2
+ %a = alloca i8, i32 64, align 4, addrspace(5)
+ %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
+ %voff = call i32 @llvm.amdgcn.workitem.id.x()
+ %voff4 = mul i32 %voff, 4
+ %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
+ %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
+ store volatile i8 1, i8 addrspace(5)* %p1
+ %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
+ store volatile i8 2, i8 addrspace(5)* %p2
+ %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
+ store volatile i8 4, i8 addrspace(5)* %p4
+ ret void
+}
+
+define amdgpu_kernel void @soff4_voff1(i32 %soff) {
+; GFX940-LABEL: soff4_voff1:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: v_mov_b32_e32 v2, 2
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+bb:
+ %soff4 = mul i32 %soff, 4
+ %a = alloca i8, i32 64, align 4, addrspace(5)
+ %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
+ %voff = call i32 @llvm.amdgcn.workitem.id.x()
+ %voff1 = mul i32 %voff, 1
+ %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
+ %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
+ store volatile i8 1, i8 addrspace(5)* %p1
+ %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
+ store volatile i8 2, i8 addrspace(5)* %p2
+ %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
+ store volatile i8 4, i8 addrspace(5)* %p4
+ ret void
+}
+
+define amdgpu_kernel void @soff4_voff2(i32 %soff) {
+; GFX940-LABEL: soff4_voff2:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: v_mul_u32_u24_e32 v0, 2, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 2
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+bb:
+ %soff4 = mul i32 %soff, 4
+ %a = alloca i8, i32 64, align 4, addrspace(5)
+ %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
+ %voff = call i32 @llvm.amdgcn.workitem.id.x()
+ %voff2 = mul i32 %voff, 2
+ %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
+ %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
+ store volatile i8 1, i8 addrspace(5)* %p1
+ %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
+ store volatile i8 2, i8 addrspace(5)* %p2
+ %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
+ store volatile i8 4, i8 addrspace(5)* %p4
+ ret void
+}
+
+define amdgpu_kernel void @soff4_voff4(i32 %soff) {
+; GFX940-LABEL: soff4_voff4:
+; GFX940: ; %bb.0: ; %bb
+; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
+; GFX940-NEXT: v_mov_b32_e32 v1, 1
+; GFX940-NEXT: v_mul_u32_u24_e32 v0, 4, v0
+; GFX940-NEXT: v_mov_b32_e32 v2, 2
+; GFX940-NEXT: s_waitcnt lgkmcnt(0)
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: v_mov_b32_e32 v1, 4
+; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
+; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_endpgm
+bb:
+ %soff4 = mul i32 %soff, 4
+ %a = alloca i8, i32 64, align 4, addrspace(5)
+ %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
+ %voff = call i32 @llvm.amdgcn.workitem.id.x()
+ %voff4 = mul i32 %voff, 4
+ %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
+ %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
+ store volatile i8 1, i8 addrspace(5)* %p1
+ %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
+ store volatile i8 2, i8 addrspace(5)* %p2
+ %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
+ store volatile i8 4, i8 addrspace(5)* %p4
+ ret void
+}
More information about the llvm-commits
mailing list