[llvm] [AMDGPU] Enhance error handling for mismatched calling conventions (PR #137825)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 29 08:17:39 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Manuel Carrasco (mgcarrasco)
<details>
<summary>Changes</summary>
Introduce early failure when mismatched calling conventions are detected to prevent errors from propagating further in the AMDGPU backend.
---
Full diff: https://github.com/llvm/llvm-project/pull/137825.diff
10 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp (+10-1)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+30-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/calling-conventions.ll (+2-2)
- (added) llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll (+18)
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll (+2-62)
- (modified) llvm/test/CodeGen/AMDGPU/required-export-priority.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/sibling-call.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index a15f193549936..e2e386d204d55 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1453,13 +1453,22 @@ bool AMDGPUCallLowering::lowerChainCall(MachineIRBuilder &MIRBuilder,
bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
- if (Function *F = Info.CB->getCalledFunction())
+ if (Function *F = Info.CB->getCalledFunction()) {
if (F->isIntrinsic()) {
assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
"Unexpected intrinsic");
return lowerChainCall(MIRBuilder, Info);
}
+ // Detect UB caused due to calling convention mismatches early to avoid
+ // debugging if errors occur later.
+ if (F->getCallingConv() != Info.CallConv) {
+ LLVM_DEBUG(dbgs() << "Failed to lower call: calling convention mismatch "
+ "(undefined behavior)\n");
+ return false;
+ }
+ }
+
if (Info.IsVarArg) {
LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
return false;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c05ba42d999e9..481fd7c4e9dac 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3696,11 +3696,39 @@ enum ChainCallArgIdx {
// The wave scratch offset register is used as the global base pointer.
SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
CallingConv::ID CallConv = CLI.CallConv;
- bool IsChainCallConv = AMDGPU::isChainCC(CallConv);
- SelectionDAG &DAG = CLI.DAG;
+ // Detect UB caused due to calling convention mismatches early to avoid
+ // debugging if errors occur later.
+ if (Function *CalledFn = CLI.CB->getCalledFunction()) {
+ if (CalledFn->getIntrinsicID() == Intrinsic::amdgcn_cs_chain) {
+ // This intrinsic handles calls to functions with specific calling
+ // conventions. These functions might have two valid calling conventions
+ // at a single callsite, requiring special handling.
+ if (Value *FnArg = CLI.CB->getArgOperand(0)) {
+ if (Function *WrappedFn = dyn_cast<Function>(FnArg)) {
+ switch (WrappedFn->getCallingConv()) {
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ break;
+ default:
+ return lowerUnhandledCall(
+ CLI, InVals,
+ "calling convention mismatch (undefined behavior) ");
+ }
+ }
+ }
+ } else {
+ CallingConv::ID CalledFnCC = CalledFn->getCallingConv();
+ if (CalledFnCC != CallConv) {
+ return lowerUnhandledCall(
+ CLI, InVals, "calling convention mismatch (undefined behavior) ");
+ }
+ }
+ }
+ bool IsChainCallConv = AMDGPU::isChainCC(CallConv);
const SDLoc &DL = CLI.DL;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 0b6fe90b90654..96ee15f2eb78b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -60,11 +60,11 @@ declare [5 x i8] @external_a5i8_func_void() #0
declare hidden i32 @external_i32_func_i32(i32) #0
; amdgpu_gfx calling convention
-declare i1 @external_gfx_i1_func_void() #0
-declare i8 @external_gfx_i8_func_void() #0
-declare i32 @external_gfx_i32_func_void() #0
-declare { i32, i64 } @external_gfx_i32_i64_func_void() #0
-declare hidden i32 @external_gfx_i32_func_i32(i32) #0
+declare amdgpu_gfx i1 @external_gfx_i1_func_void() #0
+declare amdgpu_gfx i8 @external_gfx_i8_func_void() #0
+declare amdgpu_gfx i32 @external_gfx_i32_func_void() #0
+declare amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() #0
+declare hidden amdgpu_gfx i32 @external_gfx_i32_func_i32(i32) #0
define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index c18c96d9c50c7..ca580d8f29c84 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -802,7 +802,7 @@ entry:
ret i32 %ret
}
-declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
+declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-LABEL: name: sibling_call_fastcc_multi_byval
@@ -969,7 +969,7 @@ entry:
ret void
}
-declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
+declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
; Callee has a byval and non-byval stack passed argument
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 0c335e45c9e2f..bc8e21e03251d 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -209,7 +209,7 @@ define amdgpu_kernel void @call_coldcc() #0 {
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_endpgm
- %val = call float @coldcc(float 1.0)
+ %val = call coldcc float @coldcc(float 1.0)
store float %val, ptr addrspace(1) poison
ret void
}
@@ -303,7 +303,7 @@ define amdgpu_kernel void @call_fastcc() #0 {
; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_endpgm
- %val = call float @fastcc(float 1.0)
+ %val = call fastcc float @fastcc(float 1.0)
store float %val, ptr addrspace(1) poison
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll b/llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll
new file mode 100644
index 0000000000000..a8442d9090857
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/cc-mismatch-ub.ll
@@ -0,0 +1,18 @@
+; RUN: not llc -mtriple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ISEL %s
+; CHECK-NO-ISEL: error: {{.*}} in function main {{.*}} calling convention mismatch (undefined behavior) foo
+
+; RUN: not --crash llc -debug -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CHECK-ISEL %s
+; CHECK-ISEL: Failed to lower call: calling convention mismatch (undefined behavior)
+
+; COM: This test aims to identify invalid method calls.
+; COM: By doing so, it simplifies debugging by exposing issues earlier in the pipeline.
+
+define amdgpu_ps i32 @foo(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1) {
+ ret i32 0
+}
+
+define amdgpu_ps i32 @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1) {
+main_body:
+ %C = call i32 @foo(ptr addrspace(4) null, ptr addrspace(4) %arg)
+ ret i32 %C
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
index 4afc2fc972a28..13fff0215804d 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
@@ -777,117 +777,57 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1)
ret void
}
-define hidden void @void_func_void_clobber_s33() #1 {
+define hidden amdgpu_gfx void @void_func_void_clobber_s33() #1 {
; GFX9-LABEL: void_func_void_clobber_s33:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v0, s33, 0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: v_readlane_b32 s33, v0, 0
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v0, s33, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
-; GFX10-NEXT: v_readlane_b32 s33, v0, 0
-; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: v_writelane_b32 v0, s33, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_readlane_b32 s33, v0, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s33}"() #0
ret void
}
-define hidden void @void_func_void_clobber_s34() #1 {
+define hidden amdgpu_gfx void @void_func_void_clobber_s34() #1 {
; GFX9-LABEL: void_func_void_clobber_s34:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v0, s34, 0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: v_readlane_b32 s34, v0, 0
-; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s34:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v0, s34, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
-; GFX10-NEXT: v_readlane_b32 s34, v0, 0
-; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s34:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: v_writelane_b32 v0, s34, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_readlane_b32 s34, v0, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s34}"() #0
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index f14cd4488ef1e..4aa1ddee2efe3 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -310,7 +310,7 @@ define amdgpu_ps void @test_export_in_callee(float %v) #0 {
; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GCN-NEXT: s_endpgm
%x = fadd float %v, 1.0
- call void @test_export_gfx(float %x)
+ call amdgpu_gfx void @test_export_gfx(float %x)
ret void
}
@@ -330,7 +330,7 @@ define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
; GCN-NEXT: s_endpgm
%x = fadd float %v, 1.0
call void @llvm.amdgcn.s.setprio(i16 0)
- call void @test_export_gfx(float %x)
+ call amdgpu_gfx void @test_export_gfx(float %x)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 0221bb0cf4f35..27e24828a8630 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -873,7 +873,7 @@ entry:
ret i32 %ret
}
-declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
+declare hidden fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-LABEL: sibling_call_fastcc_multi_byval:
@@ -908,7 +908,7 @@ entry:
ret void
}
-declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
+declare hidden fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
; Callee has a byval and non-byval stack passed argument
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
index c779f1d548ea0..16e0a34376b29 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll
@@ -4,7 +4,7 @@
target triple = "amdgcn-amd-amdhsa"
; Unknown functions are conservatively passed all implicit parameters
-declare void @unknown_call()
+declare fastcc void @unknown_call()
; Use the same constant as a sgpr parameter (for the kernel id) and for a vector operation
define protected amdgpu_kernel void @kern(ptr %addr) !llvm.amdgcn.lds.kernel.id !0 {
; CHECK-LABEL: kern:
``````````
</details>
https://github.com/llvm/llvm-project/pull/137825
More information about the llvm-commits
mailing list