[llvm] [AMDGPU] Reorganize tests to unblock #112403 (PR #115503)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 08:00:51 PST 2024
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/115503
We’re facing an issue (#113782) that is currently blocking #112403. However,
since #112403 involves extensive test changes, I’d prefer to land it as soon as
possible. This PR reorganizes the tests by moving test cases expected to fail
into a separate file. Additionally, it changes the `[15 x i32]` arguments to
`[13 x i32]` to bypass the issue.
>From 803a03c504e4f47715203322aecab7ec1f104418 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 8 Nov 2024 10:45:17 -0500
Subject: [PATCH] [AMDGPU] Reorganize tests to unblock #112403
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We’re facing an issue (#113782) that is currently blocking #112403. However,
since #112403 involves extensive test changes, I’d prefer to land it as soon as
possible. This PR reorganizes the tests by moving test cases expected to fail
into a separate file. Additionally, it changes the `[15 x i32]` arguments to
`[13 x i32]` to bypass the issue.
---
.../call-args-inreg-no-sgpr-for-csrspill.ll | 36 ++++++
llvm/test/CodeGen/AMDGPU/call-args-inreg.ll | 104 +++++++-----------
2 files changed, 75 insertions(+), 65 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill.ll
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill.ll
new file mode 100644
index 00000000000000..47c53fcae6a0bf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
+
+; XFAIL: *
+
+declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0
+declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0
+declare hidden void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg, i32 inreg) #0
+declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg, i32 inreg) #1
+
+define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #0 {
+ call void @external_void_func_a15i32_inreg([15 x i32] inreg %arg0)
+ ret void
+}
+
+define void @test_call_external_void_func_a16i32_inreg([16 x i32] inreg %arg0) #0 {
+ call void @external_void_func_a16i32_inreg([16 x i32] inreg %arg0)
+ ret void
+}
+
+define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #0 {
+ call void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1)
+ ret void
+}
+
+
+define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #1 {
+ call void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg %arg0, i32 inreg %arg1)
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
index 8766303d7ee6ec..d35b5fe818bef8 100644
--- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
@@ -28,10 +28,8 @@ declare hidden void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg)
declare hidden void @external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg, i32 inreg, i64 inreg) #0
-declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0
-declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0
-declare hidden void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg, i32 inreg) #0
-declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg, i32 inreg) #1
+declare hidden void @external_void_func_a15i32_inreg([13 x i32] inreg) #0
+declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([13 x i32] inreg, i32 inreg) #1
define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 {
; GFX9-LABEL: test_call_external_void_func_i8_inreg:
@@ -534,12 +532,6 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 {
ret void
}
-; FIXME:
-; define void @test_call_external_void_func_v16i32_inreg(<16 x i32> inreg %arg) #0 {
-; call void @external_void_func_v16i32_inreg(<16 x i32> inreg %arg)
-; ret void
-; }
-
define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
; GFX9-LABEL: test_call_external_void_func_f16_inreg:
; GFX9: ; %bb.0:
@@ -1402,16 +1394,16 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre
ret void
}
-define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #0 {
+define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #0 {
; GFX9-LABEL: test_call_external_void_func_a15i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s29, s33
+; GFX9-NEXT: s_mov_b32 s27, s33
; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 vcc, -1
+; GFX9-NEXT: s_or_saveexec_b64 s[28:29], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, vcc
-; GFX9-NEXT: v_writelane_b32 v40, s29, 2
+; GFX9-NEXT: s_mov_b64 exec, s[28:29]
+; GFX9-NEXT: v_writelane_b32 v40, s27, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s3, s17
@@ -1427,13 +1419,11 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #
; GFX9-NEXT: s_mov_b32 s22, s24
; GFX9-NEXT: s_mov_b32 s23, s25
; GFX9-NEXT: s_mov_b32 s24, s26
-; GFX9-NEXT: s_mov_b32 s25, s27
-; GFX9-NEXT: s_mov_b32 s26, s28
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 vcc
-; GFX9-NEXT: s_add_u32 vcc_lo, vcc_lo, external_void_func_a15i32_inreg at rel32@lo+4
-; GFX9-NEXT: s_addc_u32 vcc_hi, vcc_hi, external_void_func_a15i32_inreg at rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], vcc
+; GFX9-NEXT: s_getpc_b64 s[28:29]
+; GFX9-NEXT: s_add_u32 s28, s28, external_void_func_a15i32_inreg at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s29, s29, external_void_func_a15i32_inreg at rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[28:29]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
@@ -1448,19 +1438,17 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #
; GFX11-LABEL: test_call_external_void_func_a15i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s25, s33
+; GFX11-NEXT: s_mov_b32 s23, s33
; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_or_saveexec_b32 s26, -1
+; GFX11-NEXT: s_or_saveexec_b32 s24, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s26
-; GFX11-NEXT: v_writelane_b32 v40, s25, 2
-; GFX11-NEXT: s_mov_b32 s26, s24
-; GFX11-NEXT: s_mov_b32 s25, s23
+; GFX11-NEXT: s_mov_b32 exec_lo, s24
+; GFX11-NEXT: v_writelane_b32 v40, s23, 2
; GFX11-NEXT: s_mov_b32 s24, s22
; GFX11-NEXT: s_mov_b32 s23, s21
-; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: s_mov_b32 s22, s20
; GFX11-NEXT: s_mov_b32 s21, s19
+; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: s_mov_b32 s20, s18
; GFX11-NEXT: s_mov_b32 s19, s17
; GFX11-NEXT: s_mov_b32 s18, s16
@@ -1468,11 +1456,11 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #
; GFX11-NEXT: s_mov_b32 s16, s6
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
-; GFX11-NEXT: s_getpc_b64 s[28:29]
-; GFX11-NEXT: s_add_u32 s28, s28, external_void_func_a15i32_inreg at rel32@lo+4
-; GFX11-NEXT: s_addc_u32 s29, s29, external_void_func_a15i32_inreg at rel32@hi+12
+; GFX11-NEXT: s_getpc_b64 s[26:27]
+; GFX11-NEXT: s_add_u32 s26, s26, external_void_func_a15i32_inreg at rel32@lo+4
+; GFX11-NEXT: s_addc_u32 s27, s27, external_void_func_a15i32_inreg at rel32@hi+12
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_swappc_b64 s[30:31], s[28:29]
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[26:27]
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
@@ -1483,34 +1471,22 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
- call void @external_void_func_a15i32_inreg([15 x i32] inreg %arg0)
+ call void @external_void_func_a15i32_inreg([13 x i32] inreg %arg0)
ret void
}
-; FIXME:
-; define void @test_call_external_void_func_a16i32_inreg([16 x i32] inreg %arg0) #0 {
-; call void @external_void_func_a16i32_inreg([16 x i32] inreg %arg0)
-; ret void
-; }
-
-; FIXME:
-; define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #0 {
-; call void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1)
-; ret void
-; }
-
; FIXME: This should also fail
-define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #1 {
+define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inreg %arg0, i32 inreg %arg1) #1 {
; GFX9-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s23, s33
+; GFX9-NEXT: s_mov_b32 s21, s33
; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[24:25], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[24:25]
-; GFX9-NEXT: v_writelane_b32 v40, s23, 2
+; GFX9-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-NEXT: v_writelane_b32 v40, s21, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s3, s7
@@ -1527,13 +1503,11 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inre
; GFX9-NEXT: s_mov_b32 s11, s18
; GFX9-NEXT: s_mov_b32 s15, s19
; GFX9-NEXT: s_mov_b32 s16, s20
-; GFX9-NEXT: s_mov_b32 s17, s21
-; GFX9-NEXT: s_mov_b32 s18, s22
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[24:25]
-; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[24:25]
+; GFX9-NEXT: s_getpc_b64 s[22:23]
+; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
@@ -1548,19 +1522,19 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inre
; GFX11-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s19, s33
+; GFX11-NEXT: s_mov_b32 s17, s33
; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_or_saveexec_b32 s20, -1
+; GFX11-NEXT: s_or_saveexec_b32 s18, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s20
-; GFX11-NEXT: v_writelane_b32 v40, s19, 2
+; GFX11-NEXT: s_mov_b32 exec_lo, s18
+; GFX11-NEXT: v_writelane_b32 v40, s17, 2
; GFX11-NEXT: s_add_i32 s32, s32, 16
-; GFX11-NEXT: s_getpc_b64 s[20:21]
-; GFX11-NEXT: s_add_u32 s20, s20, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@lo+4
-; GFX11-NEXT: s_addc_u32 s21, s21, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@hi+12
+; GFX11-NEXT: s_getpc_b64 s[18:19]
+; GFX11-NEXT: s_add_u32 s18, s18, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@lo+4
+; GFX11-NEXT: s_addc_u32 s19, s19, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
-; GFX11-NEXT: s_swappc_b64 s[30:31], s[20:21]
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
@@ -1572,7 +1546,7 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inre
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
- call void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg %arg0, i32 inreg %arg1)
+ call void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([13 x i32] inreg %arg0, i32 inreg %arg1)
ret void
}
More information about the llvm-commits
mailing list