[llvm] 6479e9b - [AMDGPU] Fix UB in tests due to mismatched calling conventions (#137957)

via llvm-commits llvm-commits at lists.llvm.org
Tue May 6 09:31:30 PDT 2025


Author: Manuel Carrasco
Date: 2025-05-06T18:31:27+02:00
New Revision: 6479e9bad371f19a36ed15c236defc301efc180f

URL: https://github.com/llvm/llvm-project/commit/6479e9bad371f19a36ed15c236defc301efc180f
DIFF: https://github.com/llvm/llvm-project/commit/6479e9bad371f19a36ed15c236defc301efc180f.diff

LOG: [AMDGPU] Fix UB in tests due to mismatched calling conventions (#137957)

Fixes UB in tests due to mismatched CC (definition and callsite).

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
    llvm/test/CodeGen/AMDGPU/calling-conventions.ll
    llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
    llvm/test/CodeGen/AMDGPU/required-export-priority.ll
    llvm/test/CodeGen/AMDGPU/sibling-call.ll
    llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 0b6fe90b90654..96ee15f2eb78b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -60,11 +60,11 @@ declare [5 x i8] @external_a5i8_func_void() #0
 declare hidden i32 @external_i32_func_i32(i32) #0
 
 ; amdgpu_gfx calling convention
-declare i1 @external_gfx_i1_func_void() #0
-declare i8 @external_gfx_i8_func_void() #0
-declare i32 @external_gfx_i32_func_void() #0
-declare { i32, i64 } @external_gfx_i32_i64_func_void() #0
-declare hidden i32 @external_gfx_i32_func_i32(i32) #0
+declare amdgpu_gfx i1 @external_gfx_i1_func_void() #0
+declare amdgpu_gfx i8 @external_gfx_i8_func_void() #0
+declare amdgpu_gfx i32 @external_gfx_i32_func_void() #0
+declare amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() #0
+declare hidden amdgpu_gfx i32 @external_gfx_i32_func_i32(i32) #0
 
 
 define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index c18c96d9c50c7..ca580d8f29c84 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -802,7 +802,7 @@ entry:
   ret i32 %ret
 }
 
-declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
+declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
 
 define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
   ; GCN-LABEL: name: sibling_call_fastcc_multi_byval
@@ -969,7 +969,7 @@ entry:
   ret void
 }
 
-declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
+declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
 
 ; Callee has a byval and non-byval stack passed argument
 define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {

diff  --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 0c335e45c9e2f..bc8e21e03251d 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -209,7 +209,7 @@ define amdgpu_kernel void @call_coldcc() #0 {
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
 ; GFX11-NEXT:    s_endpgm
-  %val = call float @coldcc(float 1.0)
+  %val = call coldcc float @coldcc(float 1.0)
   store float %val, ptr addrspace(1) poison
   ret void
 }
@@ -303,7 +303,7 @@ define amdgpu_kernel void @call_fastcc() #0 {
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
 ; GFX11-NEXT:    s_endpgm
-  %val = call float @fastcc(float 1.0)
+  %val = call fastcc float @fastcc(float 1.0)
   store float %val, ptr addrspace(1) poison
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
index 4afc2fc972a28..13fff0215804d 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
@@ -777,117 +777,57 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1)
   ret void
 }
 
-define hidden void @void_func_void_clobber_s33() #1 {
+define hidden amdgpu_gfx void @void_func_void_clobber_s33() #1 {
 ; GFX9-LABEL: void_func_void_clobber_s33:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-NEXT:    v_writelane_b32 v0, s33, 0
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; clobber
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    v_readlane_b32 s33, v0, 0
-; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: void_func_void_clobber_s33:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX10-NEXT:    s_mov_b32 exec_lo, s4
-; GFX10-NEXT:    v_writelane_b32 v0, s33, 0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; clobber
 ; GFX10-NEXT:    ;;#ASMEND
-; GFX10-NEXT:    v_readlane_b32 s33, v0, 0
-; GFX10-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX10-NEXT:    s_mov_b32 exec_lo, s4
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: void_func_void_clobber_s33:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
-; GFX11-NEXT:    s_mov_b32 exec_lo, s0
-; GFX11-NEXT:    v_writelane_b32 v0, s33, 0
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; clobber
 ; GFX11-NEXT:    ;;#ASMEND
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_readlane_b32 s33, v0, 0
-; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
-; GFX11-NEXT:    s_mov_b32 exec_lo, s0
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "; clobber", "~{s33}"() #0
   ret void
 }
 
-define hidden void @void_func_void_clobber_s34() #1 {
+define hidden amdgpu_gfx void @void_func_void_clobber_s34() #1 {
 ; GFX9-LABEL: void_func_void_clobber_s34:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-NEXT:    v_writelane_b32 v0, s34, 0
 ; GFX9-NEXT:    ;;#ASMSTART
 ; GFX9-NEXT:    ; clobber
 ; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    v_readlane_b32 s34, v0, 0
-; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: void_func_void_clobber_s34:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX10-NEXT:    s_mov_b32 exec_lo, s4
-; GFX10-NEXT:    v_writelane_b32 v0, s34, 0
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ; clobber
 ; GFX10-NEXT:    ;;#ASMEND
-; GFX10-NEXT:    v_readlane_b32 s34, v0, 0
-; GFX10-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX10-NEXT:    s_mov_b32 exec_lo, s4
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: void_func_void_clobber_s34:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
-; GFX11-NEXT:    s_mov_b32 exec_lo, s0
-; GFX11-NEXT:    v_writelane_b32 v0, s34, 0
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ; clobber
 ; GFX11-NEXT:    ;;#ASMEND
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_readlane_b32 s34, v0, 0
-; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
-; GFX11-NEXT:    s_mov_b32 exec_lo, s0
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "; clobber", "~{s34}"() #0
   ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index f14cd4488ef1e..4aa1ddee2efe3 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -310,7 +310,7 @@ define amdgpu_ps void @test_export_in_callee(float %v) #0 {
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; GCN-NEXT:    s_endpgm
   %x = fadd float %v, 1.0
-  call void @test_export_gfx(float %x)
+  call amdgpu_gfx void @test_export_gfx(float %x)
   ret void
 }
 
@@ -330,7 +330,7 @@ define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
 ; GCN-NEXT:    s_endpgm
   %x = fadd float %v, 1.0
   call void @llvm.amdgcn.s.setprio(i16 0)
-  call void @test_export_gfx(float %x)
+  call amdgpu_gfx void @test_export_gfx(float %x)
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 382620d9a183a..35234236b848f 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -872,7 +872,7 @@ entry:
   ret i32 %ret
 }
 
-declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
+declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
 
 define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
 ; GCN-LABEL: sibling_call_fastcc_multi_byval:
@@ -907,7 +907,7 @@ entry:
   ret void
 }
 
-declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
+declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
 
 ; Callee has a byval and non-byval stack passed argument
 define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
index c779f1d548ea0..16e0a34376b29 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
@@ -4,7 +4,7 @@
 target triple = "amdgcn-amd-amdhsa"
 
 ; Unknown functions are conservatively passed all implicit parameters
-declare void @unknown_call()
+declare fastcc void @unknown_call()
 ; Use the same constant as a sgpr parameter (for the kernel id) and for a vector operation
 define protected amdgpu_kernel void @kern(ptr %addr) !llvm.amdgcn.lds.kernel.id !0 {
 ; CHECK-LABEL: kern:


        


More information about the llvm-commits mailing list