[llvm] f63a19b - [SPARC] Add tail call support for 64-bit target
Brad Smith via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 26 20:32:14 PST 2022
Author: Koakuma
Date: 2022-11-26T23:29:05-05:00
New Revision: f63a19baf0a3ffa62cfb48cc42f9ced1cbd39b5c
URL: https://github.com/llvm/llvm-project/commit/f63a19baf0a3ffa62cfb48cc42f9ced1cbd39b5c
DIFF: https://github.com/llvm/llvm-project/commit/f63a19baf0a3ffa62cfb48cc42f9ced1cbd39b5c.diff
LOG: [SPARC] Add tail call support for 64-bit target
Extend SPARC tail call support, first introduced in D51206 (commit 1c235c375492180c2eecb6331f169486019fd2d2),
to also cover 64-bit target.
Reviewed By: MaskRay
Differential Revision: https://reviews.llvm.org/D138741
Added:
Modified:
llvm/lib/Target/Sparc/SparcISelLowering.cpp
llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
llvm/test/CodeGen/SPARC/tailcall.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 2665a8d5f264..e3e69f3564ca 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -770,7 +770,10 @@ bool SparcTargetLowering::IsEligibleForTailCallOptimization(
return false;
// Do not tail call opt if the stack is used to pass parameters.
- if (CCInfo.getNextStackOffset() != 0)
+ // 64-bit targets have a slightly higher limit since the ABI requires
+ // to allocate some space even when all the parameters fit inside registers.
+ unsigned StackOffsetLimit = Subtarget->is64Bit() ? 48 : 0;
+ if (CCInfo.getNextStackOffset() > StackOffsetLimit)
return false;
// Do not tail call opt if either the callee or caller returns
@@ -1189,20 +1192,21 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
SDValue Chain = CLI.Chain;
auto PtrVT = getPointerTy(DAG.getDataLayout());
- // Sparc target does not yet support tail call optimization.
- CLI.IsTailCall = false;
-
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
+ CLI.IsTailCall = CLI.IsTailCall && IsEligibleForTailCallOptimization(
+ CCInfo, CLI, DAG.getMachineFunction());
+
// Get the size of the outgoing arguments stack space requirement.
// The stack offset computed by CC_Sparc64 includes all arguments.
// Called functions expect 6 argument words to exist in the stack frame, used
// or not.
- unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset());
+ unsigned StackReserved = 6 * 8u;
+ unsigned ArgsSize = std::max(StackReserved, CCInfo.getNextStackOffset());
// Keep stack frames 16-byte aligned.
ArgsSize = alignTo(ArgsSize, 16);
@@ -1211,10 +1215,13 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
if (CLI.IsVarArg)
fixupVariableFloatArgs(ArgLocs, CLI.Outs);
+ assert(!CLI.IsTailCall || ArgsSize == StackReserved);
+
// Adjust the stack pointer to make room for the arguments.
// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
// with more than 6 arguments.
- Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
+ if (!CLI.IsTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
// Collect the set of registers to pass to the function and their values.
// This will be emitted as a sequence of CopyToReg nodes glued to the call
@@ -1274,10 +1281,16 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
DAG.getLoad(MVT::i64, DL, Store, HiPtrOff, MachinePointerInfo());
SDValue Lo64 =
DAG.getLoad(MVT::i64, DL, Store, LoPtrOff, MachinePointerInfo());
- RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()),
- Hi64));
- RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()+1),
- Lo64));
+
+ Register HiReg = VA.getLocReg();
+ Register LoReg = VA.getLocReg() + 1;
+ if (!CLI.IsTailCall) {
+ HiReg = toCallerWindow(HiReg);
+ LoReg = toCallerWindow(LoReg);
+ }
+
+ RegsToPass.push_back(std::make_pair(HiReg, Hi64));
+ RegsToPass.push_back(std::make_pair(LoReg, Lo64));
continue;
}
@@ -1298,7 +1311,11 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
++i;
}
}
- RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg));
+
+ Register Reg = VA.getLocReg();
+ if (!CLI.IsTailCall)
+ Reg = toCallerWindow(Reg);
+ RegsToPass.push_back(std::make_pair(Reg, Arg));
continue;
}
@@ -1366,6 +1383,10 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(InGlue);
// Now the call itself.
+ if (CLI.IsTailCall) {
+ DAG.getMachineFunction().getFrameInfo().setHasTailCall();
+ return DAG.getNode(SPISD::TAIL_CALL, DL, MVT::Other, Ops);
+ }
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, Ops);
InGlue = Chain.getValue(1);
diff --git a/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll b/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
index aeafd8bba1da..6e3c47cc0dca 100644
--- a/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
+++ b/llvm/test/CodeGen/SPARC/2011-01-11-Call.ll
@@ -37,11 +37,9 @@ declare void @bar(...)
; V8-NEXT: mov %g1, %o7
; V9-LABEL: test_tail_call_with_return
-; V9: save %sp
-; V9: call foo
-; V9-NEXT: nop
-; V9: ret
-; V9-NEXT: restore %g0, %o0, %o0
+; V9: mov %o7, %g1
+; V9-NEXT: call foo
+; V9-NEXT: mov %g1, %o7
define i32 @test_tail_call_with_return() nounwind {
entry:
diff --git a/llvm/test/CodeGen/SPARC/tailcall.ll b/llvm/test/CodeGen/SPARC/tailcall.ll
index 6c95e3d72920..5ca621b27a2f 100644
--- a/llvm/test/CodeGen/SPARC/tailcall.ll
+++ b/llvm/test/CodeGen/SPARC/tailcall.ll
@@ -1,46 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s --check-prefix=V8
+; RUN: llc < %s -mtriple=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefix=V9
define i32 @simple_leaf(i32 %i) #0 {
-; CHECK-LABEL: simple_leaf:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: mov %o7, %g1
-; CHECK-NEXT: call foo
-; CHECK-NEXT: mov %g1, %o7
+; V8-LABEL: simple_leaf:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: mov %o7, %g1
+; V8-NEXT: call foo
+; V8-NEXT: mov %g1, %o7
+;
+; V9-LABEL: simple_leaf:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: mov %o7, %g1
+; V9-NEXT: call foo
+; V9-NEXT: mov %g1, %o7
entry:
%call = tail call i32 @foo(i32 %i)
ret i32 %call
}
define i32 @simple_standard(i32 %i) #1 {
-; CHECK-LABEL: simple_standard:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: save %sp, -96, %sp
-; CHECK-NEXT: call foo
-; CHECK-NEXT: restore
+; V8-LABEL: simple_standard:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: save %sp, -96, %sp
+; V8-NEXT: call foo
+; V8-NEXT: restore
+;
+; V9-LABEL: simple_standard:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: save %sp, -128, %sp
+; V9-NEXT: call foo
+; V9-NEXT: restore
entry:
%call = tail call i32 @foo(i32 %i)
ret i32 %call
}
define i32 @extra_arg_leaf(i32 %i) #0 {
-; CHECK-LABEL: extra_arg_leaf:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: mov 12, %o1
-; CHECK-NEXT: mov %o7, %g1
-; CHECK-NEXT: call foo2
-; CHECK-NEXT: mov %g1, %o7
+; V8-LABEL: extra_arg_leaf:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: mov 12, %o1
+; V8-NEXT: mov %o7, %g1
+; V8-NEXT: call foo2
+; V8-NEXT: mov %g1, %o7
+;
+; V9-LABEL: extra_arg_leaf:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: mov 12, %o1
+; V9-NEXT: mov %o7, %g1
+; V9-NEXT: call foo2
+; V9-NEXT: mov %g1, %o7
entry:
%call = tail call i32 @foo2(i32 %i, i32 12)
ret i32 %call
}
define i32 @extra_arg_standard(i32 %i) #1 {
-; CHECK-LABEL: extra_arg_standard:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: save %sp, -96, %sp
-; CHECK-NEXT: call foo2
-; CHECK-NEXT: restore %g0, 12, %o1
+; V8-LABEL: extra_arg_standard:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: save %sp, -96, %sp
+; V8-NEXT: call foo2
+; V8-NEXT: restore %g0, 12, %o1
+;
+; V9-LABEL: extra_arg_standard:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: save %sp, -128, %sp
+; V9-NEXT: call foo2
+; V9-NEXT: restore %g0, 12, %o1
entry:
%call = tail call i32 @foo2(i32 %i, i32 12)
ret i32 %call
@@ -49,17 +75,31 @@ entry:
; Perform tail call optimization for external symbol.
define void @caller_extern(i8* %src) optsize #0 {
-; CHECK-LABEL: caller_extern:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: sethi %hi(dest), %o1
-; CHECK-NEXT: add %o1, %lo(dest), %o1
-; CHECK-NEXT: mov 7, %o2
-; CHECK-NEXT: mov %o0, %o3
-; CHECK-NEXT: mov %o1, %o0
-; CHECK-NEXT: mov %o3, %o1
-; CHECK-NEXT: mov %o7, %g1
-; CHECK-NEXT: call memcpy
-; CHECK-NEXT: mov %g1, %o7
+; V8-LABEL: caller_extern:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: sethi %hi(dest), %o1
+; V8-NEXT: add %o1, %lo(dest), %o1
+; V8-NEXT: mov 7, %o2
+; V8-NEXT: mov %o0, %o3
+; V8-NEXT: mov %o1, %o0
+; V8-NEXT: mov %o3, %o1
+; V8-NEXT: mov %o7, %g1
+; V8-NEXT: call memcpy
+; V8-NEXT: mov %g1, %o7
+;
+; V9-LABEL: caller_extern:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: sethi %h44(dest), %o1
+; V9-NEXT: add %o1, %m44(dest), %o1
+; V9-NEXT: sllx %o1, 12, %o1
+; V9-NEXT: add %o1, %l44(dest), %o1
+; V9-NEXT: mov 7, %o2
+; V9-NEXT: mov %o0, %o3
+; V9-NEXT: mov %o1, %o0
+; V9-NEXT: mov %o3, %o1
+; V9-NEXT: mov %o7, %g1
+; V9-NEXT: call memcpy
+; V9-NEXT: mov %g1, %o7
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i32(
i8* getelementptr inbounds ([2 x i8],
@@ -71,24 +111,38 @@ entry:
; Perform tail call optimization for function pointer.
define i32 @func_ptr_test(i32 ()* nocapture %func_ptr) #0 {
-; CHECK-LABEL: func_ptr_test:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: jmp %o0
-; CHECK-NEXT: nop
+; V8-LABEL: func_ptr_test:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: jmp %o0
+; V8-NEXT: nop
+;
+; V9-LABEL: func_ptr_test:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: jmp %o0
+; V9-NEXT: nop
entry:
%call = tail call i32 %func_ptr() #1
ret i32 %call
}
define i32 @func_ptr_test2(i32 (i32, i32, i32)* nocapture %func_ptr,
-; CHECK-LABEL: func_ptr_test2:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: save %sp, -96, %sp
-; CHECK-NEXT: mov 10, %i3
-; CHECK-NEXT: mov %i0, %i4
-; CHECK-NEXT: mov %i1, %i0
-; CHECK-NEXT: jmp %i4
-; CHECK-NEXT: restore %g0, %i3, %o1
+; V8-LABEL: func_ptr_test2:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: save %sp, -96, %sp
+; V8-NEXT: mov 10, %i3
+; V8-NEXT: mov %i0, %i4
+; V8-NEXT: mov %i1, %i0
+; V8-NEXT: jmp %i4
+; V8-NEXT: restore %g0, %i3, %o1
+;
+; V9-LABEL: func_ptr_test2:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: save %sp, -128, %sp
+; V9-NEXT: mov 10, %i3
+; V9-NEXT: mov %i0, %i4
+; V9-NEXT: mov %i1, %i0
+; V9-NEXT: jmp %i4
+; V9-NEXT: restore %g0, %i3, %o1
i32 %r, i32 %q) #1 {
entry:
%call = tail call i32 %func_ptr(i32 %r, i32 10, i32 %q) #1
@@ -99,20 +153,35 @@ entry:
; Do not tail call optimize if stack is used to pass parameters.
define i32 @caller_args() #0 {
-; CHECK-LABEL: caller_args:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: save %sp, -104, %sp
-; CHECK-NEXT: mov 6, %i0
-; CHECK-NEXT: mov %g0, %o0
-; CHECK-NEXT: mov 1, %o1
-; CHECK-NEXT: mov 2, %o2
-; CHECK-NEXT: mov 3, %o3
-; CHECK-NEXT: mov 4, %o4
-; CHECK-NEXT: mov 5, %o5
-; CHECK-NEXT: call foo7
-; CHECK-NEXT: st %i0, [%sp+92]
-; CHECK-NEXT: ret
-; CHECK-NEXT: restore %g0, %o0, %o0
+; V8-LABEL: caller_args:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: save %sp, -104, %sp
+; V8-NEXT: mov 6, %i0
+; V8-NEXT: mov %g0, %o0
+; V8-NEXT: mov 1, %o1
+; V8-NEXT: mov 2, %o2
+; V8-NEXT: mov 3, %o3
+; V8-NEXT: mov 4, %o4
+; V8-NEXT: mov 5, %o5
+; V8-NEXT: call foo7
+; V8-NEXT: st %i0, [%sp+92]
+; V8-NEXT: ret
+; V8-NEXT: restore %g0, %o0, %o0
+;
+; V9-LABEL: caller_args:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: save %sp, -192, %sp
+; V9-NEXT: mov 6, %i0
+; V9-NEXT: mov 0, %o0
+; V9-NEXT: mov 1, %o1
+; V9-NEXT: mov 2, %o2
+; V9-NEXT: mov 3, %o3
+; V9-NEXT: mov 4, %o4
+; V9-NEXT: mov 5, %o5
+; V9-NEXT: call foo7
+; V9-NEXT: stx %i0, [%sp+2223]
+; V9-NEXT: ret
+; V9-NEXT: restore %g0, %o0, %o0
entry:
%r = tail call i32 @foo7(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
ret i32 %r
@@ -123,15 +192,23 @@ entry:
; byval parameters.
define i32 @caller_byval() #0 {
-; CHECK-LABEL: caller_byval:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: save %sp, -104, %sp
-; CHECK-NEXT: ld [%fp+-4], %i0
-; CHECK-NEXT: st %i0, [%fp+-8]
-; CHECK-NEXT: call callee_byval
-; CHECK-NEXT: add %fp, -8, %o0
-; CHECK-NEXT: ret
-; CHECK-NEXT: restore %g0, %o0, %o0
+; V8-LABEL: caller_byval:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: save %sp, -104, %sp
+; V8-NEXT: ld [%fp+-4], %i0
+; V8-NEXT: st %i0, [%fp+-8]
+; V8-NEXT: call callee_byval
+; V8-NEXT: add %fp, -8, %o0
+; V8-NEXT: ret
+; V8-NEXT: restore %g0, %o0, %o0
+;
+; V9-LABEL: caller_byval:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: save %sp, -192, %sp
+; V9-NEXT: call callee_byval
+; V9-NEXT: add %fp, 2039, %o0
+; V9-NEXT: ret
+; V9-NEXT: restore %g0, %o0, %o0
entry:
%a = alloca i32*
%r = tail call i32 @callee_byval(i32** byval(i32*) %a)
@@ -141,11 +218,17 @@ entry:
; Perform tail call optimization for sret function.
define void @sret_test(%struct.a* noalias sret(%struct.a) %agg.result) #0 {
-; CHECK-LABEL: sret_test:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: mov %o7, %g1
-; CHECK-NEXT: call sret_func
-; CHECK-NEXT: mov %g1, %o7
+; V8-LABEL: sret_test:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: mov %o7, %g1
+; V8-NEXT: call sret_func
+; V8-NEXT: mov %g1, %o7
+;
+; V9-LABEL: sret_test:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: mov %o7, %g1
+; V9-NEXT: call sret_func
+; V9-NEXT: mov %g1, %o7
entry:
tail call void bitcast (void (%struct.a*)* @sret_func to
void (%struct.a*)*)(%struct.a* sret(%struct.a) %agg.result)
@@ -157,17 +240,30 @@ entry:
; struct will generate a memcpy as the tail function.
define void @ret_large_struct(%struct.big* noalias sret(%struct.big) %agg.result) #0 {
-; CHECK-LABEL: ret_large_struct:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: save %sp, -96, %sp
-; CHECK-NEXT: ld [%fp+64], %i0
-; CHECK-NEXT: sethi %hi(bigstruct), %i1
-; CHECK-NEXT: add %i1, %lo(bigstruct), %o1
-; CHECK-NEXT: mov 400, %o2
-; CHECK-NEXT: call memcpy
-; CHECK-NEXT: mov %i0, %o0
-; CHECK-NEXT: jmp %i7+12
-; CHECK-NEXT: restore
+; V8-LABEL: ret_large_struct:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: save %sp, -96, %sp
+; V8-NEXT: ld [%fp+64], %i0
+; V8-NEXT: sethi %hi(bigstruct), %i1
+; V8-NEXT: add %i1, %lo(bigstruct), %o1
+; V8-NEXT: mov 400, %o2
+; V8-NEXT: call memcpy
+; V8-NEXT: mov %i0, %o0
+; V8-NEXT: jmp %i7+12
+; V8-NEXT: restore
+;
+; V9-LABEL: ret_large_struct:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: save %sp, -176, %sp
+; V9-NEXT: sethi %h44(bigstruct), %i1
+; V9-NEXT: add %i1, %m44(bigstruct), %i1
+; V9-NEXT: sllx %i1, 12, %i1
+; V9-NEXT: add %i1, %l44(bigstruct), %o1
+; V9-NEXT: mov 400, %o2
+; V9-NEXT: call memcpy
+; V9-NEXT: mov %i0, %o0
+; V9-NEXT: ret
+; V9-NEXT: restore
entry:
%0 = bitcast %struct.big* %agg.result to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 bitcast (%struct.big* @bigstruct to i8*), i32 400, i1 false)
@@ -177,10 +273,17 @@ entry:
; Test register + immediate pattern.
define void @addri_test(i32 %ptr) #0 {
-; CHECK-LABEL: addri_test:
-; CHECK: ! %bb.0: ! %entry
-; CHECK-NEXT: jmp %o0+4
-; CHECK-NEXT: nop
+; V8-LABEL: addri_test:
+; V8: ! %bb.0: ! %entry
+; V8-NEXT: jmp %o0+4
+; V8-NEXT: nop
+;
+; V9-LABEL: addri_test:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: add %o0, 4, %o0
+; V9-NEXT: srl %o0, 0, %o0
+; V9-NEXT: jmp %o0
+; V9-NEXT: nop
entry:
%add = add nsw i32 %ptr, 4
%0 = inttoptr i32 %add to void ()*
More information about the llvm-commits
mailing list