[llvm] AMDGPU: Add baseline test for gws handling with AGPR inputs (PR #169372)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 24 09:25:13 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/169372
None
>From 9a17612fd82daca177cd37f1fd139c638d549a88 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 21 Nov 2025 16:20:36 -0500
Subject: [PATCH] AMDGPU: Add baseline test for gws handling with AGPR inputs
---
llvm/test/CodeGen/AMDGPU/gws_agpr.ll | 395 +++++++++++++++++++++++++++
1 file changed, 395 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/gws_agpr.ll
diff --git a/llvm/test/CodeGen/AMDGPU/gws_agpr.ll b/llvm/test/CodeGen/AMDGPU/gws_agpr.ll
new file mode 100644
index 0000000000000..2082a519d4f83
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/gws_agpr.ll
@@ -0,0 +1,395 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=CHECK,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+
+define void @gws_init_offset0() #0 {
+; SDAG-LABEL: gws_init_offset0:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a0
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: s_mov_b32 m0, 0
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_init a0 gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_init_offset0:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
+; GISEL-NEXT: s_mov_b32 m0, 0
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_init v0 gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 0)
+ ret void
+}
+
+define void @gws_init_offset63() #0 {
+; SDAG-LABEL: gws_init_offset63:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a0
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: s_mov_b32 m0, 0
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_init a0 offset:63 gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_init_offset63:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
+; GISEL-NEXT: s_mov_b32 m0, 0
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_init v0 offset:63 gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 63)
+ ret void
+}
+
+define void @gws_init_sgpr_offset(i32 inreg %offset) #0 {
+; SDAG-LABEL: gws_init_sgpr_offset:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a0
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: s_lshl_b32 m0, s16, 16
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_init a0 gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_init_sgpr_offset:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
+; GISEL-NEXT: s_lshl_b32 m0, s16, 16
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_init v0 gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
+ ret void
+}
+
+define amdgpu_kernel void @gws_init_agpr_offset() #0 {
+; SDAG-LABEL: gws_init_agpr_offset:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a1
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: v_accvgpr_read_b32 v0, a1
+; SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a0
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: s_lshl_b32 m0, s0, 16
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_init a0 gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_endpgm
+;
+; GISEL-LABEL: gws_init_agpr_offset:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a1
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v0, a1
+; GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v2, a0
+; GISEL-NEXT: s_lshl_b32 m0, s0, 16
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_init v2 gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_endpgm
+ %val = call i32 asm "; def $0", "=a"()
+ %offset = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
+ ret void
+}
+
+define void @gws_init_agpr_offset_add1() #0 {
+; SDAG-LABEL: gws_init_agpr_offset_add1:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a1
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: v_accvgpr_read_b32 v0, a1
+; SDAG-NEXT: v_readfirstlane_b32 s4, v0
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a0
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: s_lshl_b32 m0, s4, 16
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_init a0 offset:1 gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_init_agpr_offset_add1:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a1
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v0, a1
+; GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v2, a0
+; GISEL-NEXT: s_lshl_b32 m0, s4, 16
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_init v2 offset:1 gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ %offset.base = call i32 asm "; def $0", "=a"()
+ %offset = add i32 %offset.base, 1
+ call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
+ ret void
+}
+
+define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 {
+; CHECK-LABEL: gws_init_vgpr_offset_add:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def a0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_accvgpr_read_b32 v0, a0
+; CHECK-NEXT: v_readfirstlane_b32 s1, v0
+; CHECK-NEXT: s_lshl_b32 m0, s1, 16
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: ds_gws_init v0 offset:3 gds
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_endpgm
+ %agpr.offset.base = call i32 asm "; def $0", "=a"()
+ %agpr.offset = add i32 %agpr.offset.base, 3
+ call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %agpr.offset)
+ ret void
+}
+
+define void @gws_barrier_offset0() #0 {
+; SDAG-LABEL: gws_barrier_offset0:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a0
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: s_mov_b32 m0, 0
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_barrier a0 gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_barrier_offset0:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
+; GISEL-NEXT: s_mov_b32 m0, 0
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_barrier v0 gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0)
+ ret void
+}
+
+define void @gws_barrier_offset63() #0 {
+; SDAG-LABEL: gws_barrier_offset63:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a0
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: s_mov_b32 m0, 0
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_barrier a0 offset:63 gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_barrier_offset63:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
+; GISEL-NEXT: s_mov_b32 m0, 0
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_barrier v0 offset:63 gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 63)
+ ret void
+}
+
+define void @gws_barrier_sgpr_offset(i32 inreg %offset) #0 {
+; SDAG-LABEL: gws_barrier_sgpr_offset:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a0
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: s_lshl_b32 m0, s16, 16
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_barrier a0 gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_barrier_sgpr_offset:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
+; GISEL-NEXT: s_lshl_b32 m0, s16, 16
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_barrier v0 gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %offset)
+ ret void
+}
+
+define void @gws_sema_v_offset0() #0 {
+; SDAG-LABEL: gws_sema_v_offset0:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_mov_b32 m0, 0
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_sema_v gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_sema_v_offset0:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_mov_b32 m0, 0
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_sema_v gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.sema.v(i32 0)
+ ret void
+}
+
+define void @gws_sema_br_offset0() #0 {
+; SDAG-LABEL: gws_sema_br_offset0:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: ;;#ASMSTART
+; SDAG-NEXT: ; def a0
+; SDAG-NEXT: ;;#ASMEND
+; SDAG-NEXT: s_mov_b32 m0, 0
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_sema_br a0 gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_sema_br_offset0:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
+; GISEL-NEXT: s_mov_b32 m0, 0
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_sema_br v0 gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.sema.br(i32 %val, i32 0)
+ ret void
+}
+
+define void @gws_sema_p_offset0() #0 {
+; SDAG-LABEL: gws_sema_p_offset0:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_mov_b32 m0, 0
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_sema_p gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_sema_p_offset0:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_mov_b32 m0, 0
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_sema_p gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.sema.p(i32 0)
+ ret void
+}
+
+define void @gws_sema_release_all_offset0() #0 {
+; SDAG-LABEL: gws_sema_release_all_offset0:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_mov_b32 m0, 0
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: ds_gws_sema_release_all gds
+; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: gws_sema_release_all_offset0:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_mov_b32 m0, 0
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: ds_gws_sema_release_all gds
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: ;;#ASMSTART
+; GISEL-NEXT: ; def a0
+; GISEL-NEXT: ;;#ASMEND
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+ %val = call i32 asm "; def $0", "=a"()
+ call void @llvm.amdgcn.ds.gws.sema.release.all(i32 0)
+ ret void
+}
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list