[PATCH] D100877: AMDGPU: Fix indirect tail calls
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 20 11:49:46 PDT 2021
arsenm created this revision.
arsenm added reviewers: rampitec, madhur13490.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
Fix a selection error on uniform callees, and use a regular call if
divergent.
https://reviews.llvm.org/D100877
Files:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/sibling-call.ll
Index: llvm/test/CodeGen/AMDGPU/sibling-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -272,5 +272,35 @@
ret i32 %ret
}
+ at func_ptr_gv = external unnamed_addr addrspace(4) constant i32(i32, i32)*, align 4
+
+; Do support tail calls with a uniform, but unknown, callee.
+; GCN-LABEL: {{^}}indirect_uniform_sibling_call_i32_fastcc_i32_i32:
+; GCN: s_load_dwordx2 [[GV_ADDR:s\[[0-9]+:[0-9]+\]]]
+; GCN: s_load_dwordx2 [[FUNC_PTR:s\[[0-9]+:[0-9]+\]]], [[GV_ADDR]]
+; GCN: s_setpc_b64 [[FUNC_PTR]]
+define hidden fastcc i32 @indirect_uniform_sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 {
+entry:
+ %func.ptr.load = load i32(i32, i32)*, i32(i32, i32)* addrspace(4)* @func_ptr_gv
+ %ret = tail call fastcc i32 %func.ptr.load(i32 %a, i32 %b)
+ ret i32 %ret
+}
+
+; We can't support a tail call to a divergent target. Use a waterfall
+; loop around a regular call
+; GCN-LABEL: {{^}}indirect_divergent_sibling_call_i32_fastcc_i32_i32:
+; GCN: v_readfirstlane_b32
+; GCN: v_readfirstlane_b32
+; GCN: s_and_saveexec_b64
+; GCN: s_swappc_b64
+; GCN: s_cbranch_execnz
+; GCN: s_setpc_b64
+define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(i32(i32, i32)* %func.ptr, i32 %a, i32 %b, i32 %c) #1 {
+entry:
+ %add = add i32 %b, %c
+ %ret = tail call fastcc i32 %func.ptr(i32 %a, i32 %add)
+ ret i32 %ret
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind noinline }
Index: llvm/lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstructions.td
+++ llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -492,6 +492,11 @@
let isConvergent = 1;
}
+// Handle selecting indirect tail calls
+def : GCNPat<
+ (AMDGPUtc_return i64:$src0, (i64 0), (i32 timm:$fpdiff)),
+ (SI_TCRETURN SReg_64:$src0, (i64 0), i32imm:$fpdiff)
+>;
def ADJCALLSTACKUP : SPseudoInstSI<
(outs), (ins i32imm:$amt0, i32imm:$amt1),
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2851,6 +2851,11 @@
if (!mayTailCallThisCC(CalleeCC))
return false;
+ // For a divergent call target, we need to do a waterfall loop over the
+ // possible callees which precludes us from using a simple jump.
+ if (Callee->isDivergent())
+ return false;
+
MachineFunction &MF = DAG.getMachineFunction();
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D100877.338946.patch
Type: text/x-patch
Size: 2691 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210420/83f9d1e4/attachment.bin>
More information about the llvm-commits
mailing list