[llvm] 7751a91 - [AArch64][FastISel] Handle call with multiple return regs

Alexis Engelke via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 27 02:59:39 PDT 2023


Author: Alexis Engelke
Date: 2023-04-27T11:59:33+02:00
New Revision: 7751a91465799d5ff0dc1df5c7d010b16598a0ec

URL: https://github.com/llvm/llvm-project/commit/7751a91465799d5ff0dc1df5c7d010b16598a0ec
DIFF: https://github.com/llvm/llvm-project/commit/7751a91465799d5ff0dc1df5c7d010b16598a0ec.diff

LOG: [AArch64][FastISel] Handle call with multiple return regs

The code closely follows the X86 back-end. Applications that make heavy
use of {i64, i64} returns to use two registers strongly benefit from the
reduced number of SelectionDAG fallbacks.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D148346

Added: 
    llvm/test/CodeGen/AArch64/fast-isel-call-struct-return-fallback.ll
    llvm/test/CodeGen/AArch64/fast-isel-call-struct-return.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64FastISel.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 8575aacf78ff7..c4d3bf33148a2 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -272,7 +272,7 @@ class AArch64FastISel final : public FastISel {
   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
                        unsigned &NumBytes);
-  bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
+  bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
 
 public:
   // Backend specific FastISel code.
@@ -3102,8 +3102,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
   return true;
 }
 
-bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
-                                 unsigned NumBytes) {
+bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
   CallingConv::ID CC = CLI.CallConv;
 
   // Issue CALLSEQ_END
@@ -3111,33 +3110,31 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
     .addImm(NumBytes).addImm(0);
 
-  // Now the return value.
-  if (RetVT != MVT::isVoid) {
-    SmallVector<CCValAssign, 16> RVLocs;
-    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
-    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
+  // Now the return values.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
+  CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
 
-    // Only handle a single return value.
-    if (RVLocs.size() != 1)
-      return false;
-
-    // Copy all of the result registers out of their specified physreg.
-    MVT CopyVT = RVLocs[0].getValVT();
+  Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    MVT CopyVT = VA.getValVT();
+    unsigned CopyReg = ResultReg + i;
 
     // TODO: Handle big-endian results
     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
       return false;
 
-    Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
-            TII.get(TargetOpcode::COPY), ResultReg)
-        .addReg(RVLocs[0].getLocReg());
-    CLI.InRegs.push_back(RVLocs[0].getLocReg());
-
-    CLI.ResultReg = ResultReg;
-    CLI.NumResultRegs = 1;
+    // Copy result out of their specified physreg.
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+            CopyReg)
+        .addReg(VA.getLocReg());
+    CLI.InRegs.push_back(VA.getLocReg());
   }
 
+  CLI.ResultReg = ResultReg;
+  CLI.NumResultRegs = RVLocs.size();
+
   return true;
 }
 
@@ -3185,13 +3182,6 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   if (IsVarArg)
     return false;
 
-  // FIXME: Only handle *simple* calls for now.
-  MVT RetVT;
-  if (CLI.RetTy->isVoidTy())
-    RetVT = MVT::isVoid;
-  else if (!isTypeLegal(CLI.RetTy, RetVT))
-    return false;
-
   for (auto Flag : CLI.OutFlags)
     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
         Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
@@ -3287,7 +3277,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   CLI.Call = MIB;
 
   // Finish off the call including any return values.
-  return finishCall(CLI, RetVT, NumBytes);
+  return finishCall(CLI, NumBytes);
 }
 
 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return-fallback.ll b/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return-fallback.ll
new file mode 100644
index 0000000000000..52d8eb35da7b0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return-fallback.ll
@@ -0,0 +1,14 @@
+; RUN: llc -fast-isel -pass-remarks-missed=isel < %s 2>&1 >/dev/null | FileCheck -check-prefix=STDERR -allow-empty %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linux-gnu"
+
+declare { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } @ret_s10i64()
+
+define i64 @call_ret_s10i64() {
+; STDERR: FastISel missed call:   %ret = call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } @ret_s10i64() (in function: call_ret_s10i64)
+  %ret = call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } @ret_s10i64()
+  %ext0 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %ret, 0
+  %ext1 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %ret, 1
+  %sum = add i64 %ext0, %ext1
+  ret i64 %sum
+}

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return.ll b/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return.ll
new file mode 100644
index 0000000000000..6b43bdfa276ec
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fast-isel-call-struct-return.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no-generate-body-for-unused-prefixes
+; RUN: llc -fast-isel -fast-isel-abort=3 < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linux-gnu"
+
+declare { i64, i64 } @ret_s2i64()
+
+define i64 @call_ret_s2i64() {
+; CHECK-LABEL: call_ret_s2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl ret_s2i64
+; CHECK-NEXT:    add x0, x0, x1
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %ret = call { i64, i64 } @ret_s2i64()
+  %ext0 = extractvalue { i64, i64 } %ret, 0
+  %ext1 = extractvalue { i64, i64 } %ret, 1
+  %sum = add i64 %ext0, %ext1
+  ret i64 %sum
+}
+
+declare { i64, i64, i64, i64, i64, i64, i64, i64 } @ret_s8i64()
+
+define i64 @call_ret_s8i64() {
+; CHECK-LABEL: call_ret_s8i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl ret_s8i64
+; CHECK-NEXT:    add x0, x0, x7
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %ret = call { i64, i64, i64, i64, i64, i64, i64, i64 } @ret_s8i64()
+  %ext0 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } %ret, 0
+  %ext7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64 } %ret, 7
+  %sum = add i64 %ext0, %ext7
+  ret i64 %sum
+}


        


More information about the llvm-commits mailing list