[llvm] 7751a91 - [AArch64][FastISel] Handle call with multiple return regs
Alexis Engelke via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 27 02:59:39 PDT 2023
Author: Alexis Engelke
Date: 2023-04-27T11:59:33+02:00
New Revision: 7751a91465799d5ff0dc1df5c7d010b16598a0ec
URL: https://github.com/llvm/llvm-project/commit/7751a91465799d5ff0dc1df5c7d010b16598a0ec
DIFF: https://github.com/llvm/llvm-project/commit/7751a91465799d5ff0dc1df5c7d010b16598a0ec.diff
LOG: [AArch64][FastISel] Handle call with multiple return regs
The code closely follows the X86 back-end. Applications that make heavy
use of {i64, i64} returns to use two registers strongly benefit from the
reduced number of SelectionDAG fallbacks.
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D148346
Added:
llvm/test/CodeGen/AArch64/fast-isel-call-struct-return-fallback.ll
llvm/test/CodeGen/AArch64/fast-isel-call-struct-return.ll
Modified:
llvm/lib/Target/AArch64/AArch64FastISel.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 8575aacf78ff7..c4d3bf33148a2 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -272,7 +272,7 @@ class AArch64FastISel final : public FastISel {
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
unsigned &NumBytes);
- bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
+ bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
public:
// Backend specific FastISel code.
@@ -3102,8 +3102,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
return true;
}
-bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
- unsigned NumBytes) {
+bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
CallingConv::ID CC = CLI.CallConv;
// Issue CALLSEQ_END
@@ -3111,33 +3110,31 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(0);
- // Now the return value.
- if (RetVT != MVT::isVoid) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
- CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
+ // Now the return values.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
- // Only handle a single return value.
- if (RVLocs.size() != 1)
- return false;
-
- // Copy all of the result registers out of their specified physreg.
- MVT CopyVT = RVLocs[0].getValVT();
+ Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ MVT CopyVT = VA.getValVT();
+ unsigned CopyReg = ResultReg + i;
// TODO: Handle big-endian results
if (CopyVT.isVector() && !Subtarget->isLittleEndian())
return false;
- Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(RVLocs[0].getLocReg());
- CLI.InRegs.push_back(RVLocs[0].getLocReg());
-
- CLI.ResultReg = ResultReg;
- CLI.NumResultRegs = 1;
+ // Copy result out of their specified physreg.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ CopyReg)
+ .addReg(VA.getLocReg());
+ CLI.InRegs.push_back(VA.getLocReg());
}
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = RVLocs.size();
+
return true;
}
@@ -3185,13 +3182,6 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (IsVarArg)
return false;
- // FIXME: Only handle *simple* calls for now.
- MVT RetVT;
- if (CLI.RetTy->isVoidTy())
- RetVT = MVT::isVoid;
- else if (!isTypeLegal(CLI.RetTy, RetVT))
- return false;
-
for (auto Flag : CLI.OutFlags)
if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
@@ -3287,7 +3277,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CLI.Call = MIB;
// Finish off the call including any return values.
- return finishCall(CLI, RetVT, NumBytes);
+ return finishCall(CLI, NumBytes);
}
bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return-fallback.ll b/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return-fallback.ll
new file mode 100644
index 0000000000000..52d8eb35da7b0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return-fallback.ll
@@ -0,0 +1,14 @@
+; RUN: llc -fast-isel -pass-remarks-missed=isel < %s 2>&1 >/dev/null | FileCheck -check-prefix=STDERR -allow-empty %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linux-gnu"
+
+declare { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } @ret_s10i64()
+
+define i64 @call_ret_s10i64() {
+; STDERR: FastISel missed call: %ret = call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } @ret_s10i64() (in function: call_ret_s10i64)
+ %ret = call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } @ret_s10i64()
+ %ext0 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %ret, 0
+ %ext1 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %ret, 1
+ %sum = add i64 %ext0, %ext1
+ ret i64 %sum
+}
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return.ll b/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return.ll
new file mode 100644
index 0000000000000..6b43bdfa276ec
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no-generate-body-for-unused-prefixes
+; RUN: llc -fast-isel -fast-isel-abort=3 < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linux-gnu"
+
+declare { i64, i64 } @ret_s2i64()
+
+define i64 @call_ret_s2i64() {
+; CHECK-LABEL: call_ret_s2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl ret_s2i64
+; CHECK-NEXT: add x0, x0, x1
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %ret = call { i64, i64 } @ret_s2i64()
+ %ext0 = extractvalue { i64, i64 } %ret, 0
+ %ext1 = extractvalue { i64, i64 } %ret, 1
+ %sum = add i64 %ext0, %ext1
+ ret i64 %sum
+}
+
+declare { i64, i64, i64, i64, i64, i64, i64, i64 } @ret_s8i64()
+
+define i64 @call_ret_s8i64() {
+; CHECK-LABEL: call_ret_s8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl ret_s8i64
+; CHECK-NEXT: add x0, x0, x7
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %ret = call { i64, i64, i64, i64, i64, i64, i64, i64 } @ret_s8i64()
+ %ext0 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } %ret, 0
+ %ext7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } %ret, 7
+ %sum = add i64 %ext0, %ext7
+ ret i64 %sum
+}
More information about the llvm-commits
mailing list