[llvm] [AMDGPU] Reorganize tests to unblock #112403 (PR #115503)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 8 08:01:24 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)

<details>
<summary>Changes</summary>

We’re facing an issue (#<!-- -->113782) that is currently blocking #<!-- -->112403. However,
since #<!-- -->112403 involves extensive test changes, I’d prefer to land it as soon as
possible. This PR reorganizes the tests by moving test cases expected to fail
into a separate file. Additionally, it changes the `[15 x i32]` arguments to
`[13 x i32]` to bypass the issue.

---
Full diff: https://github.com/llvm/llvm-project/pull/115503.diff


2 Files Affected:

- (added) llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill.ll (+36) 
- (modified) llvm/test/CodeGen/AMDGPU/call-args-inreg.ll (+39-65) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill.ll
new file mode 100644
index 00000000000000..47c53fcae6a0bf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
+
+; XFAIL: *
+
+declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0
+declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0
+declare hidden void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg, i32 inreg) #0
+declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg, i32 inreg) #1
+
+define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #0 {
+  call void @external_void_func_a15i32_inreg([15 x i32] inreg %arg0)
+  ret void
+}
+
+define void @test_call_external_void_func_a16i32_inreg([16 x i32] inreg %arg0) #0 {
+  call void @external_void_func_a16i32_inreg([16 x i32] inreg %arg0)
+  ret void
+}
+
+define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #0 {
+  call void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1)
+  ret void
+}
+
+
+define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #1 {
+  call void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg %arg0, i32 inreg %arg1)
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
index 8766303d7ee6ec..d35b5fe818bef8 100644
--- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
@@ -28,10 +28,8 @@ declare hidden void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg)
 
 declare hidden void @external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg, i32 inreg, i64 inreg) #0
 
-declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0
-declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0
-declare hidden void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg, i32 inreg) #0
-declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg, i32 inreg) #1
+declare hidden void @external_void_func_a15i32_inreg([13 x i32] inreg) #0
+declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([13 x i32] inreg, i32 inreg) #1
 
 define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 {
 ; GFX9-LABEL: test_call_external_void_func_i8_inreg:
@@ -534,12 +532,6 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 {
   ret void
 }
 
-; FIXME:
-; define void @test_call_external_void_func_v16i32_inreg(<16 x i32> inreg %arg) #0 {
-;   call void @external_void_func_v16i32_inreg(<16 x i32> inreg %arg)
-;   ret void
-; }
-
 define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
 ; GFX9-LABEL: test_call_external_void_func_f16_inreg:
 ; GFX9:       ; %bb.0:
@@ -1402,16 +1394,16 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre
   ret void
 }
 
-define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #0 {
+define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #0 {
 ; GFX9-LABEL: test_call_external_void_func_a15i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s29, s33
+; GFX9-NEXT:    s_mov_b32 s27, s33
 ; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 vcc, -1
+; GFX9-NEXT:    s_or_saveexec_b64 s[28:29], -1
 ; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, vcc
-; GFX9-NEXT:    v_writelane_b32 v40, s29, 2
+; GFX9-NEXT:    s_mov_b64 exec, s[28:29]
+; GFX9-NEXT:    v_writelane_b32 v40, s27, 2
 ; GFX9-NEXT:    s_addk_i32 s32, 0x400
 ; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
 ; GFX9-NEXT:    s_mov_b32 s3, s17
@@ -1427,13 +1419,11 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #
 ; GFX9-NEXT:    s_mov_b32 s22, s24
 ; GFX9-NEXT:    s_mov_b32 s23, s25
 ; GFX9-NEXT:    s_mov_b32 s24, s26
-; GFX9-NEXT:    s_mov_b32 s25, s27
-; GFX9-NEXT:    s_mov_b32 s26, s28
 ; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_getpc_b64 vcc
-; GFX9-NEXT:    s_add_u32 vcc_lo, vcc_lo, external_void_func_a15i32_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 vcc_hi, vcc_hi, external_void_func_a15i32_inreg at rel32@hi+12
-; GFX9-NEXT:    s_swappc_b64 s[30:31], vcc
+; GFX9-NEXT:    s_getpc_b64 s[28:29]
+; GFX9-NEXT:    s_add_u32 s28, s28, external_void_func_a15i32_inreg at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s29, s29, external_void_func_a15i32_inreg at rel32@hi+12
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[28:29]
 ; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
 ; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
 ; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
@@ -1448,19 +1438,17 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #
 ; GFX11-LABEL: test_call_external_void_func_a15i32_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s25, s33
+; GFX11-NEXT:    s_mov_b32 s23, s33
 ; GFX11-NEXT:    s_mov_b32 s33, s32
-; GFX11-NEXT:    s_or_saveexec_b32 s26, -1
+; GFX11-NEXT:    s_or_saveexec_b32 s24, -1
 ; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GFX11-NEXT:    s_mov_b32 exec_lo, s26
-; GFX11-NEXT:    v_writelane_b32 v40, s25, 2
-; GFX11-NEXT:    s_mov_b32 s26, s24
-; GFX11-NEXT:    s_mov_b32 s25, s23
+; GFX11-NEXT:    s_mov_b32 exec_lo, s24
+; GFX11-NEXT:    v_writelane_b32 v40, s23, 2
 ; GFX11-NEXT:    s_mov_b32 s24, s22
 ; GFX11-NEXT:    s_mov_b32 s23, s21
-; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
 ; GFX11-NEXT:    s_mov_b32 s22, s20
 ; GFX11-NEXT:    s_mov_b32 s21, s19
+; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
 ; GFX11-NEXT:    s_mov_b32 s20, s18
 ; GFX11-NEXT:    s_mov_b32 s19, s17
 ; GFX11-NEXT:    s_mov_b32 s18, s16
@@ -1468,11 +1456,11 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #
 ; GFX11-NEXT:    s_mov_b32 s16, s6
 ; GFX11-NEXT:    s_add_i32 s32, s32, 16
 ; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX11-NEXT:    s_getpc_b64 s[28:29]
-; GFX11-NEXT:    s_add_u32 s28, s28, external_void_func_a15i32_inreg at rel32@lo+4
-; GFX11-NEXT:    s_addc_u32 s29, s29, external_void_func_a15i32_inreg at rel32@hi+12
+; GFX11-NEXT:    s_getpc_b64 s[26:27]
+; GFX11-NEXT:    s_add_u32 s26, s26, external_void_func_a15i32_inreg at rel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s27, s27, external_void_func_a15i32_inreg at rel32@hi+12
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    s_swappc_b64 s[30:31], s[28:29]
+; GFX11-NEXT:    s_swappc_b64 s[30:31], s[26:27]
 ; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
 ; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
 ; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
@@ -1483,34 +1471,22 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #
 ; GFX11-NEXT:    s_mov_b32 s33, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  call void @external_void_func_a15i32_inreg([15 x i32] inreg %arg0)
+  call void @external_void_func_a15i32_inreg([13 x i32] inreg %arg0)
   ret void
 }
 
-; FIXME:
-; define void @test_call_external_void_func_a16i32_inreg([16 x i32] inreg %arg0) #0 {
-;   call void @external_void_func_a16i32_inreg([16 x i32] inreg %arg0)
-;   ret void
-; }
-
-; FIXME:
-; define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #0 {
-;   call void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1)
-;   ret void
-; }
-
 
 ; FIXME: This should also fail
-define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #1 {
+define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inreg %arg0, i32 inreg %arg1) #1 {
 ; GFX9-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s23, s33
+; GFX9-NEXT:    s_mov_b32 s21, s33
 ; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[24:25], -1
+; GFX9-NEXT:    s_or_saveexec_b64 s[22:23], -1
 ; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[24:25]
-; GFX9-NEXT:    v_writelane_b32 v40, s23, 2
+; GFX9-NEXT:    s_mov_b64 exec, s[22:23]
+; GFX9-NEXT:    v_writelane_b32 v40, s21, 2
 ; GFX9-NEXT:    s_addk_i32 s32, 0x400
 ; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
 ; GFX9-NEXT:    s_mov_b32 s3, s7
@@ -1527,13 +1503,11 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inre
 ; GFX9-NEXT:    s_mov_b32 s11, s18
 ; GFX9-NEXT:    s_mov_b32 s15, s19
 ; GFX9-NEXT:    s_mov_b32 s16, s20
-; GFX9-NEXT:    s_mov_b32 s17, s21
-; GFX9-NEXT:    s_mov_b32 s18, s22
 ; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_getpc_b64 s[24:25]
-; GFX9-NEXT:    s_add_u32 s24, s24, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s25, s25, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@hi+12
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[24:25]
+; GFX9-NEXT:    s_getpc_b64 s[22:23]
+; GFX9-NEXT:    s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@hi+12
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[22:23]
 ; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
 ; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
 ; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
@@ -1548,19 +1522,19 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inre
 ; GFX11-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s19, s33
+; GFX11-NEXT:    s_mov_b32 s17, s33
 ; GFX11-NEXT:    s_mov_b32 s33, s32
-; GFX11-NEXT:    s_or_saveexec_b32 s20, -1
+; GFX11-NEXT:    s_or_saveexec_b32 s18, -1
 ; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GFX11-NEXT:    s_mov_b32 exec_lo, s20
-; GFX11-NEXT:    v_writelane_b32 v40, s19, 2
+; GFX11-NEXT:    s_mov_b32 exec_lo, s18
+; GFX11-NEXT:    v_writelane_b32 v40, s17, 2
 ; GFX11-NEXT:    s_add_i32 s32, s32, 16
-; GFX11-NEXT:    s_getpc_b64 s[20:21]
-; GFX11-NEXT:    s_add_u32 s20, s20, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@lo+4
-; GFX11-NEXT:    s_addc_u32 s21, s21, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@hi+12
+; GFX11-NEXT:    s_getpc_b64 s[18:19]
+; GFX11-NEXT:    s_add_u32 s18, s18, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s19, s19, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@hi+12
 ; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
 ; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX11-NEXT:    s_swappc_b64 s[30:31], s[20:21]
+; GFX11-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
 ; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
@@ -1572,7 +1546,7 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inre
 ; GFX11-NEXT:    s_mov_b32 s33, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  call void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg %arg0, i32 inreg %arg1)
+  call void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([13 x i32] inreg %arg0, i32 inreg %arg1)
   ret void
 }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/115503


More information about the llvm-commits mailing list