[clang] [llvm] [AArch64] Support preserve_none calling convention (PR #91046)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 6 17:19:17 PDT 2024


https://github.com/antangelo updated https://github.com/llvm/llvm-project/pull/91046

>From 767173a0dfde9858c90867cc5d476da90e5ba898 Mon Sep 17 00:00:00 2001
From: Antonio Abbatangelo <contact at antangelo.com>
Date: Tue, 30 Apr 2024 22:58:18 -0400
Subject: [PATCH 1/5] [AArch64] Support preserve_none calling convention

---
 clang/include/clang/Basic/Attr.td             |   3 +-
 clang/include/clang/Basic/AttrDocs.td         |  19 +-
 clang/lib/Basic/Targets/AArch64.cpp           |   1 +
 clang/test/CodeGen/preserve-call-conv.c       |   6 +-
 llvm/docs/LangRef.rst                         |   2 +-
 .../Target/AArch64/AArch64CallingConvention.h |   3 +
 .../AArch64/AArch64CallingConvention.td       |  27 ++
 .../Target/AArch64/AArch64ISelLowering.cpp    |  34 +-
 .../Target/AArch64/AArch64RegisterInfo.cpp    |  12 +-
 .../AArch64/GISel/AArch64CallLowering.cpp     |   1 +
 .../AArch64/dynamic-regmask-preserve-none.ll  |  88 +++++
 llvm/test/CodeGen/AArch64/preserve.ll         |   9 +-
 llvm/test/CodeGen/AArch64/preserve_nonecc.ll  |  92 +++++
 .../CodeGen/AArch64/preserve_nonecc_call.ll   | 325 ++++++++++++++++++
 .../AArch64/preserve_nonecc_musttail.ll       |  11 +
 .../CodeGen/AArch64/preserve_nonecc_swift.ll  |  16 +
 16 files changed, 631 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/dynamic-regmask-preserve-none.ll
 create mode 100644 llvm/test/CodeGen/AArch64/preserve_nonecc.ll
 create mode 100644 llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
 create mode 100644 llvm/test/CodeGen/AArch64/preserve_nonecc_musttail.ll
 create mode 100644 llvm/test/CodeGen/AArch64/preserve_nonecc_swift.ll

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 0225598cbbe8ad..712c79927304e2 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -3038,7 +3038,8 @@ def M68kRTD: DeclOrTypeAttr {
   let Documentation = [M68kRTDDocs];
 }
 
-def PreserveNone : DeclOrTypeAttr, TargetSpecificAttr<TargetAnyX86> {
+def PreserveNone : DeclOrTypeAttr,
+                   TargetSpecificAttr<TargetArch<!listconcat(TargetAArch64.Arches, TargetAnyX86.Arches)>> {
   let Spellings = [Clang<"preserve_none">];
   let Subjects = SubjectList<[FunctionLike]>;
   let Documentation = [PreserveNoneDocs];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index f8253143b596c0..d23465b77e7edd 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -5658,17 +5658,20 @@ experimental at this time.
 def PreserveNoneDocs : Documentation {
   let Category = DocCatCallingConvs;
   let Content = [{
-On X86-64 target, this attribute changes the calling convention of a function.
+On X86-64 and AArch64 targets, this attribute changes the calling convention of a function.
 The ``preserve_none`` calling convention tries to preserve as few general
 registers as possible. So all general registers are caller saved registers. It
 also uses more general registers to pass arguments. This attribute doesn't
-impact floating-point registers (XMMs/YMMs). Floating-point registers still
-follow the c calling convention.
-
-- Only RSP and RBP are preserved by callee.
-
-- Register RDI, RSI, RDX, RCX, R8, R9, R11, R12, R13, R14, R15 and RAX now can
-  be used to pass function arguments.
+impact floating-point registers. 
+
+- On X86-64, only RSP and RBP are preserved by the callee.
+  Registers RDI, RSI, RDX, RCX, R8, R9, R11, R12, R13, R14, R15 and RAX now can
+  be used to pass function arguments. Floating-point registers (XMMs/YMMs) still
+  follow the C calling convention.
+- On AArch64, only LR and FP are preserved by the callee.
+  Registers X19-X28 and X0-X17 are used to pass function arguments.
+  X18, SIMD and floating-point registers follow the AAPCS calling
+  convention.
   }];
 }
 
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index c8d243a8fb7aea..e1f7dbf1d9f20b 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1202,6 +1202,7 @@ AArch64TargetInfo::checkCallingConvention(CallingConv CC) const {
   case CC_SwiftAsync:
   case CC_PreserveMost:
   case CC_PreserveAll:
+  case CC_PreserveNone:
   case CC_OpenCLKernel:
   case CC_AArch64VectorCall:
   case CC_AArch64SVEPCS:
diff --git a/clang/test/CodeGen/preserve-call-conv.c b/clang/test/CodeGen/preserve-call-conv.c
index 74bf695e6f331d..65973206403f70 100644
--- a/clang/test/CodeGen/preserve-call-conv.c
+++ b/clang/test/CodeGen/preserve-call-conv.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,X86-LINUX
-// RUN: %clang_cc1 -triple arm64-unknown-unknown -emit-llvm < %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,LINUX
+// RUN: %clang_cc1 -triple arm64-unknown-unknown -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,LINUX
 
 // RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-unknown-windows-msvc -emit-llvm %s -o - | FileCheck %s
@@ -23,5 +23,5 @@ void boo(void) __attribute__((preserve_all)) {
 // is lowered to the corresponding calling convention attrribute at the LLVM IR
 // level.
 void bar(void) __attribute__((preserve_none)) {
-  // X86-LINUX-LABEL: define {{(dso_local )?}}preserve_nonecc void @bar()
+  // LINUX-LABEL: define {{(dso_local )?}}preserve_nonecc void @bar()
 }
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2077fdd841fcd6..1259cc568204f9 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -458,7 +458,7 @@ added in the future:
     registers to pass arguments. This attribute doesn't impact non-general
     purpose registers (e.g. floating point registers, on X86 XMMs/YMMs).
     Non-general purpose registers still follow the standard c calling
-    convention. Currently it is for x86_64 only.
+    convention. Currently it is for x86_64 and AArch64 only.
 "``cxx_fast_tlscc``" - The `CXX_FAST_TLS` calling convention for access functions
     Clang generates an access function to access C++-style TLS. The access
     function generally has an entry block, an exit block and an initialization
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/llvm/lib/Target/AArch64/AArch64CallingConvention.h
index 3b51ee12b7477e..63185a97cba03d 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -52,6 +52,9 @@ bool CC_AArch64_Arm64EC_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT,
 bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
                     CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
                     CCState &State);
+bool CC_AArch64_Preserve_None(unsigned ValNo, MVT ValVT, MVT LocVT,
+                              CCValAssign::LocInfo LocInfo,
+                              ISD::ArgFlagsTy ArgFlags, CCState &State);
 bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
                          CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
                          CCState &State);
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 8e67f0f5c8815f..9eee2b65c28fdc 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -494,6 +494,29 @@ def CC_AArch64_GHC : CallingConv<[
   CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
 ]>;
 
+let Entry = 1 in
+def CC_AArch64_Preserve_None : CallingConv<[
+    // We only preserve:
+    // - X18, which is used for the 'nest' parameter.
+    // - X29, the frame pointer
+    // - X30, the link register
+    // All other registers can be used to pass arguments.
+    // Non-volatile registers are used first, so functions may call
+    // normal functions without saving and reloading arguments.
+    CCIfType<[i32], CCAssignToReg<[W19, W20, W21, W22, W23,
+                                   W24, W25, W26, W27, W28,
+                                   W0, W1, W2, W3, W4, W5,
+                                   W6, W7, W8, W9, W10, W11,
+                                   W12, W13, W14, W15, W16, W17]>>,
+    CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23,
+                                   X24, X25, X26, X27, X28,
+                                   X0, X1, X2, X3, X4, X5,
+                                   X6, X7, X8, X9, X10, X11,
+                                   X12, X13, X14, X15, X16, X17]>>,
+
+    CCDelegateTo<CC_AArch64_AAPCS>
+]>;
+
 // The order of the callee-saves in this file is important, because the
 // FrameLowering code will use this order to determine the layout the
 // callee-save area in the stack frame. As can be observed below, Darwin
@@ -606,6 +629,8 @@ def CSR_AArch64_AllRegs
 
 def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>;
 
+def CSR_AArch64_NoneRegs : CalleeSavedRegs<(add LR, FP)>;
+
 def CSR_AArch64_RT_MostRegs :  CalleeSavedRegs<(add CSR_AArch64_AAPCS,
                                                 (sequence "X%u", 9, 15))>;
 
@@ -681,6 +706,8 @@ def CSR_Darwin_AArch64_RT_AllRegs
 // These all preserve x18 in addition to any other registers.
 def CSR_AArch64_NoRegs_SCS
     : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>;
+def CSR_AArch64_NoneRegs_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_NoneRegs, X18)>;
 def CSR_AArch64_AllRegs_SCS
     : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>;
 def CSR_AArch64_AAPCS_SwiftError_SCS
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b27d204f3dded0..4e69d6c7f7ca95 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6825,6 +6825,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
     report_fatal_error("Unsupported calling convention.");
   case CallingConv::GHC:
     return CC_AArch64_GHC;
+  case CallingConv::PreserveNone:
+    return CC_AArch64_Preserve_None;
   case CallingConv::C:
   case CallingConv::Fast:
   case CallingConv::PreserveMost:
@@ -7348,6 +7350,20 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
     FuncInfo->setLazySaveTPIDR2Obj(TPIDR2Obj);
   }
 
+  if (CallConv == CallingConv::PreserveNone) {
+    for (const ISD::InputArg &I : Ins) {
+      if (I.Flags.isSwiftSelf() || I.Flags.isSwiftError() ||
+          I.Flags.isSwiftAsync()) {
+        MachineFunction &MF = DAG.getMachineFunction();
+        DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
+            MF.getFunction(),
+            "Swift attributes can't be used with preserve_none",
+            DL.getDebugLoc()));
+        break;
+      }
+    }
+  }
+
   return Chain;
 }
 
@@ -7519,6 +7535,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
   case CallingConv::AArch64_SVE_VectorCall:
   case CallingConv::PreserveMost:
   case CallingConv::PreserveAll:
+  case CallingConv::PreserveNone:
   case CallingConv::Swift:
   case CallingConv::SwiftTail:
   case CallingConv::Tail:
@@ -7949,9 +7966,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
       ++NumTailCalls;
   }
 
-  if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
+  if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
     report_fatal_error("failed to perform tail call elimination on a call "
                        "site marked musttail");
+  }
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getStackSize();
@@ -8576,6 +8594,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     }
   }
 
+  if (CallConv == CallingConv::PreserveNone) {
+    for (const ISD::OutputArg &O : Outs) {
+      if (O.Flags.isSwiftSelf() || O.Flags.isSwiftError() ||
+          O.Flags.isSwiftAsync()) {
+        MachineFunction &MF = DAG.getMachineFunction();
+        DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
+            MF.getFunction(),
+            "Swift attributes can't be used with preserve_none",
+            DL.getDebugLoc()));
+        break;
+      }
+    }
+  }
+
   return Result;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ad29003f1e8173..570ea28646b628 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -75,6 +75,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     // GHC set of callee saved regs is empty as all those regs are
     // used for passing STG regs around
     return CSR_AArch64_NoRegs_SaveList;
+  if (MF->getFunction().getCallingConv() == CallingConv::PreserveNone)
+    return CSR_AArch64_NoneRegs_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
     return CSR_AArch64_AllRegs_SaveList;
 
@@ -264,6 +266,9 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
   if (CC == CallingConv::GHC)
     // This is academic because all GHC calls are (supposed to be) tail calls
     return SCS ? CSR_AArch64_NoRegs_SCS_RegMask : CSR_AArch64_NoRegs_RegMask;
+  if (CC == CallingConv::PreserveNone)
+    return SCS ? CSR_AArch64_NoneRegs_SCS_RegMask
+               : CSR_AArch64_NoneRegs_RegMask;
   if (CC == CallingConv::AnyReg)
     return SCS ? CSR_AArch64_AllRegs_SCS_RegMask : CSR_AArch64_AllRegs_RegMask;
 
@@ -298,12 +303,11 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
   if (CC == CallingConv::PreserveMost)
     return SCS ? CSR_AArch64_RT_MostRegs_SCS_RegMask
                : CSR_AArch64_RT_MostRegs_RegMask;
-  else if (CC == CallingConv::PreserveAll)
+  if (CC == CallingConv::PreserveAll)
     return SCS ? CSR_AArch64_RT_AllRegs_SCS_RegMask
                : CSR_AArch64_RT_AllRegs_RegMask;
 
-  else
-    return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
+  return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
 }
 
 const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask(
@@ -588,6 +592,8 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
     report_fatal_error("Unsupported calling convention.");
   case CallingConv::GHC:
     return HasReg(CC_AArch64_GHC_ArgRegs, Reg);
+  case CallingConv::PreserveNone:
+    return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg);
   case CallingConv::C:
   case CallingConv::Fast:
   case CallingConv::PreserveMost:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index c4197ff73187af..2615ea7f81653b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -782,6 +782,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
   case CallingConv::C:
   case CallingConv::PreserveMost:
   case CallingConv::PreserveAll:
+  case CallingConv::PreserveNone:
   case CallingConv::Swift:
   case CallingConv::SwiftTail:
   case CallingConv::Tail:
diff --git a/llvm/test/CodeGen/AArch64/dynamic-regmask-preserve-none.ll b/llvm/test/CodeGen/AArch64/dynamic-regmask-preserve-none.ll
new file mode 100644
index 00000000000000..2d4fefe82b9911
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dynamic-regmask-preserve-none.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -stop-after finalize-isel <%s | FileCheck %s
+
+; Check that the callee doesn't have calleeSavedRegisters.
+define preserve_nonecc i64 @callee1(i64 %a0, i64 %b0, i64 %c0, i64 %d0, i64 %e0) nounwind {
+  %a1 = mul i64 %a0, %b0
+  %a2 = mul i64 %a1, %c0
+  %a3 = mul i64 %a2, %d0
+  %a4 = mul i64 %a3, %e0
+  ret i64 %a4
+}
+; CHECK:     name: callee1
+; CHECK-NOT: calleeSavedRegisters:
+; CHECK:     RET_ReallyLR implicit $x0
+
+; Check that RegMask is csr_aarch64_noneregs.
+define i64 @caller1(i64 %a0) nounwind {
+  %b1 = call preserve_nonecc i64 @callee1(i64 %a0, i64 %a0, i64 %a0, i64 %a0, i64 %a0)
+  %b2 = add i64 %b1, %a0
+  ret i64 %b2
+}
+; CHECK:    name: caller1
+; CHECK:    BL @callee1, csr_aarch64_noneregs
+; CHECK:    RET_ReallyLR implicit $x0
+
+
+; Check that the callee doesn't have calleeSavedRegisters.
+define preserve_nonecc {i64, i64} @callee2(i64 %a0, i64 %b0, i64 %c0, i64 %d0, i64 %e0) nounwind {
+  %a1 = mul i64 %a0, %b0
+  %a2 = mul i64 %a1, %c0
+  %a3 = mul i64 %a2, %d0
+  %a4 = mul i64 %a3, %e0
+  %b4 = insertvalue {i64, i64} undef, i64 %a3, 0
+  %b5 = insertvalue {i64, i64} %b4, i64 %a4, 1
+  ret {i64, i64} %b5
+}
+; CHECK:     name: callee2
+; CHECK-NOT: calleeSavedRegisters:
+; CHECK:     RET_ReallyLR implicit $x0
+
+
+; Check that RegMask is csr_aarch64_noneregs.
+define {i64, i64} @caller2(i64 %a0) nounwind {
+  %b1 = call preserve_nonecc {i64, i64} @callee2(i64 %a0, i64 %a0, i64 %a0, i64 %a0, i64 %a0)
+  ret {i64, i64} %b1
+}
+; CHECK:    name: caller2
+; CHECK:    BL @callee2, csr_aarch64_noneregs
+; CHECK:    RET_ReallyLR implicit $x0
+
+
+%struct.Large = type { i64, double, double }
+
+; Declare the callee with a sret parameter.
+declare preserve_nonecc void @callee3(ptr noalias nocapture writeonly sret(%struct.Large) align 4 %a0, i64 %b0) nounwind;
+
+; Check that RegMask is csr_aarch64_noneregs.
+define void @caller3(i64 %a0) nounwind {
+  %a1 = alloca %struct.Large, align 8
+  call preserve_nonecc void @callee3(ptr nonnull sret(%struct.Large) align 8 %a1, i64 %a0)
+  ret void
+}
+; CHECK:    name: caller3
+; CHECK:    BL @callee3, csr_aarch64_noneregs
+; CHECK:    RET_ReallyLR
+
+
+; Check that the callee doesn't have calleeSavedRegisters.
+define preserve_nonecc {i64, double} @callee4(i64 %a0, i64 %b0, i64 %c0, i64 %d0, i64 %e0) nounwind {
+  %a1 = mul i64 %a0, %b0
+  %a2 = mul i64 %a1, %c0
+  %a3 = mul i64 %a2, %d0
+  %a4 = mul i64 %a3, %e0
+  %b4 = insertvalue {i64, double} undef, i64 %a3, 0
+  %b5 = insertvalue {i64, double} %b4, double 1.2, 1
+  ret {i64, double} %b5
+}
+; CHECK:     name: callee4
+; CHECK-NOT: calleeSavedRegisters:
+; CHECK:     RET_ReallyLR implicit $x0, implicit $d0
+
+; Check that RegMask is csr_aarch64_noneregs.
+define {i64, double} @caller4(i64 %a0) nounwind {
+  %b1 = call preserve_nonecc {i64, double} @callee4(i64 %a0, i64 %a0, i64 %a0, i64 %a0, i64 %a0)
+  ret {i64, double} %b1
+}
+; CHECK:    name: caller4
+; CHECK:    BL @callee4, csr_aarch64_noneregs
+; CHECK:    RET_ReallyLR implicit $x0, implicit $d0
diff --git a/llvm/test/CodeGen/AArch64/preserve.ll b/llvm/test/CodeGen/AArch64/preserve.ll
index d11a45144a9049..a8acdc1df97606 100644
--- a/llvm/test/CodeGen/AArch64/preserve.ll
+++ b/llvm/test/CodeGen/AArch64/preserve.ll
@@ -15,8 +15,15 @@ define preserve_allcc void @foo() #0 {
   call void @bar2()
   ret void
 }
+define preserve_nonecc void @qux() #0 {
+;  CHECK: qux Clobbered Registers: $ffr $fpcr $fpsr $nzcv $sp $vg $wsp $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10 $w11 $w12 $w13 $w14 $w15 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x10 $x11 $x12 $x13 $x14 $x15 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15
+
+    call void @bar1()
+    call void @bar2()
+    ret void
+}
 declare void @bar2()
 
- at llvm.used = appending global [2 x ptr] [ptr @foo, ptr @baz]
+ at llvm.used = appending global [3 x ptr] [ptr @foo, ptr @baz, ptr @qux]
 
 attributes #0 = {nounwind}
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc.ll
new file mode 100644
index 00000000000000..d3caa8311d4e29
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc.ll
@@ -0,0 +1,92 @@
+; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=aarch64-apple-darwin | FileCheck --check-prefixes=ALL %s
+; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=aarch64-apple-darwin | FileCheck --check-prefixes=ALL %s
+; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=aarch64-apple-darwin | FileCheck --check-prefixes=ALL %s
+; RUN: sed -e "s/RETTYPE/double/;s/RETVAL/0./" %s | llc -mtriple=aarch64-apple-darwin | FileCheck --check-prefixes=ALL,DOUBLE %s
+
+; We don't need to save registers before using them inside preserve_none function.
+define preserve_nonecc RETTYPE @preserve_nonecc1(i64, i64, double, double) nounwind {
+entry:
+;ALL-LABEL:   preserve_nonecc1
+;ALL:         ; %bb.0:
+;ALL-NEXT:    InlineAsm Start
+;ALL-NEXT:    InlineAsm End
+;DOUBLE-NEXT: movi d0, #0000000000000000
+;ALL-NEXT:    ret
+  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16}"()
+  ret RETTYPE RETVAL
+}
+
+; When calling a preserve_none function, all live registers must be saved and
+; restored around the function call.
+declare preserve_nonecc RETTYPE @bar(i64, i64, double, double)
+define void @preserve_nonecc2() nounwind {
+entry:
+;ALL-LABEL: preserve_nonecc2
+;ALL:       InlineAsm Start
+;ALL:       stp x9, x8
+;ALL:       stp x11, x10
+;ALL:       stp x13, x12
+;ALL:       stp x15, x14
+;ALL:       stp x17, x16
+;ALL:       stp x20, x19
+;ALL:       stp x22, x21
+;ALL:       stp x24, x23
+;ALL:       stp x26, x25
+;ALL:       stp x28, x27
+;ALL:       stp d8, d7
+;ALL:       stp d10, d9
+;ALL:       stp d12, d11
+;ALL:       stp d14, d13
+;ALL:       stp d16, d15
+;ALL:       ldp x20, x19
+;ALL:       ldp x22, x21
+;ALL:       ldp x24, x23
+;ALL:       ldp x26, x25
+;ALL:       ldp x28, x27
+;ALL:       ldp d8, d7
+;ALL:       ldp d10, d9
+;ALL:       ldp d12, d11
+;ALL:       ldp d14, d13
+;ALL:       ldp d16, d15
+;ALL:       ldp x9, x8
+;ALL:       ldp x11, x10
+;ALL:       ldp x13, x12
+;ALL:       ldp x15, x14
+;ALL:       ldp x17, x16
+;ALL:       InlineAsm Start
+  %a0 = call i64 asm sideeffect "", "={x8}"() nounwind
+  %a1 = call i64 asm sideeffect "", "={x9}"() nounwind
+  %a2 = call i64 asm sideeffect "", "={x10}"() nounwind
+  %a3 = call i64 asm sideeffect "", "={x11}"() nounwind
+  %a4 = call i64 asm sideeffect "", "={x12}"() nounwind
+  %a5 = call i64 asm sideeffect "", "={x13}"() nounwind
+  %a6 = call i64 asm sideeffect "", "={x14}"() nounwind
+  %a7 = call i64 asm sideeffect "", "={x15}"() nounwind
+  %a8 = call i64 asm sideeffect "", "={x16}"() nounwind
+  %a9 = call i64 asm sideeffect "", "={x17}"() nounwind
+  %a10 = call i64 asm sideeffect "", "={x19}"() nounwind
+  %a11 = call i64 asm sideeffect "", "={x20}"() nounwind
+  %a12 = call i64 asm sideeffect "", "={x21}"() nounwind
+  %a13 = call i64 asm sideeffect "", "={x22}"() nounwind
+  %a14 = call i64 asm sideeffect "", "={x23}"() nounwind
+  %a15 = call i64 asm sideeffect "", "={x24}"() nounwind
+  %a16 = call i64 asm sideeffect "", "={x25}"() nounwind
+  %a17 = call i64 asm sideeffect "", "={x26}"() nounwind
+  %a18 = call i64 asm sideeffect "", "={x27}"() nounwind
+  %a19 = call i64 asm sideeffect "", "={x28}"() nounwind
+
+  %f0 = call <1 x double> asm sideeffect "", "={d7}"() nounwind
+  %f1 = call <1 x double> asm sideeffect "", "={d8}"() nounwind
+  %f2 = call <1 x double> asm sideeffect "", "={d9}"() nounwind
+  %f3 = call <1 x double> asm sideeffect "", "={d10}"() nounwind
+  %f4 = call <1 x double> asm sideeffect "", "={d11}"() nounwind
+  %f5 = call <1 x double> asm sideeffect "", "={d12}"() nounwind
+  %f6 = call <1 x double> asm sideeffect "", "={d13}"() nounwind
+  %f7 = call <1 x double> asm sideeffect "", "={d14}"() nounwind
+  %f8 = call <1 x double> asm sideeffect "", "={d15}"() nounwind
+  %f9 = call <1 x double> asm sideeffect "", "={d16}"() nounwind
+
+  call preserve_nonecc RETTYPE @bar(i64 1, i64 2, double 3.0, double 4.0)
+  call void asm sideeffect "", "{x8},{x9},{x10},{x11},{x12},{x13},{x14},{x15},{x16},{x17},{x19},{x20},{x21},{x22},{x23},{x24},{x25},{x26},{x27},{x28},{d7},{d8},{d9},{d10},{d11},{d12},{d13},{d14},{d15},{d16}"(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, <1 x double> %f0, <1 x double> %f1, <1 x double> %f2, <1 x double> %f3, <1 x double> %f4, <1 x double> %f5, <1 x double> %f6, <1 x double> %f7, <1 x double> %f8, <1 x double> %f9)
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
new file mode 100644
index 00000000000000..a8cb8c0947121f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
@@ -0,0 +1,325 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck --check-prefixes=CHECK %s
+; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefixes=DARWIN %s
+
+; This test checks various function call behaviors between preserve_none and
+; normal calling conventions.
+
+declare preserve_nonecc void @callee(ptr)
+
+; Normal caller calls preserve_none callee. Will not generated tail call because
+; of incompatible calling convention. Callee saved registers are saved/restored
+; around the call.
+define void @caller1(ptr %a) {
+; CHECK-LABEL: caller1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-160]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 160
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w23, -40
+; CHECK-NEXT:    .cfi_offset w24, -48
+; CHECK-NEXT:    .cfi_offset w25, -56
+; CHECK-NEXT:    .cfi_offset w26, -64
+; CHECK-NEXT:    .cfi_offset w27, -72
+; CHECK-NEXT:    .cfi_offset w28, -80
+; CHECK-NEXT:    .cfi_offset w30, -96
+; CHECK-NEXT:    .cfi_offset b8, -104
+; CHECK-NEXT:    .cfi_offset b9, -112
+; CHECK-NEXT:    .cfi_offset b10, -120
+; CHECK-NEXT:    .cfi_offset b11, -128
+; CHECK-NEXT:    .cfi_offset b12, -136
+; CHECK-NEXT:    .cfi_offset b13, -144
+; CHECK-NEXT:    .cfi_offset b14, -152
+; CHECK-NEXT:    .cfi_offset b15, -160
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    bl callee
+; CHECK-NEXT:    ldp x20, x19, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #160 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; DARWIN-LABEL: caller1:
+; DARWIN:       ; %bb.0:
+; DARWIN-NEXT:    stp d15, d14, [sp, #-160]! ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x28, x27, [sp, #64] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x26, x25, [sp, #80] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x24, x23, [sp, #96] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x22, x21, [sp, #112] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x20, x19, [sp, #128] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x29, x30, [sp, #144] ; 16-byte Folded Spill
+; DARWIN-NEXT:    .cfi_def_cfa_offset 160
+; DARWIN-NEXT:    .cfi_offset w30, -8
+; DARWIN-NEXT:    .cfi_offset w29, -16
+; DARWIN-NEXT:    .cfi_offset w19, -24
+; DARWIN-NEXT:    .cfi_offset w20, -32
+; DARWIN-NEXT:    .cfi_offset w21, -40
+; DARWIN-NEXT:    .cfi_offset w22, -48
+; DARWIN-NEXT:    .cfi_offset w23, -56
+; DARWIN-NEXT:    .cfi_offset w24, -64
+; DARWIN-NEXT:    .cfi_offset w25, -72
+; DARWIN-NEXT:    .cfi_offset w26, -80
+; DARWIN-NEXT:    .cfi_offset w27, -88
+; DARWIN-NEXT:    .cfi_offset w28, -96
+; DARWIN-NEXT:    .cfi_offset b8, -104
+; DARWIN-NEXT:    .cfi_offset b9, -112
+; DARWIN-NEXT:    .cfi_offset b10, -120
+; DARWIN-NEXT:    .cfi_offset b11, -128
+; DARWIN-NEXT:    .cfi_offset b12, -136
+; DARWIN-NEXT:    .cfi_offset b13, -144
+; DARWIN-NEXT:    .cfi_offset b14, -152
+; DARWIN-NEXT:    .cfi_offset b15, -160
+; DARWIN-NEXT:    mov x19, x0
+; DARWIN-NEXT:    bl _callee
+; DARWIN-NEXT:    ldp x29, x30, [sp, #144] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp x20, x19, [sp, #128] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp x22, x21, [sp, #112] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp x24, x23, [sp, #96] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp x26, x25, [sp, #80] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp d15, d14, [sp], #160 ; 16-byte Folded Reload
+; DARWIN-NEXT:    ret
+  tail call preserve_nonecc void @callee(ptr %a)
+  ret void
+}
+
+; Preserve_none caller calls preserve_none callee. Same function body.
+; The tail call is preserved. No registers are saved/restored around the call.
+; Actually a simple jmp instruction is generated.
+define preserve_nonecc void @caller2(ptr %a) {
+; CHECK-LABEL: caller2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    b callee
+;
+; DARWIN-LABEL: caller2:
+; DARWIN:       ; %bb.0:
+; DARWIN-NEXT:    b _callee
+  tail call preserve_nonecc void @callee(ptr %a)
+  ret void
+}
+
+; Preserve_none function can use more registers to pass parameters.
+declare preserve_nonecc i64 @callee_with_many_param2(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21)
+define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21) {
+; CHECK-LABEL: callee_with_many_param:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    mov x20, x21
+; CHECK-NEXT:    mov x21, x22
+; CHECK-NEXT:    mov x22, x23
+; CHECK-NEXT:    mov x23, x24
+; CHECK-NEXT:    mov x24, x25
+; CHECK-NEXT:    mov x25, x26
+; CHECK-NEXT:    mov x26, x27
+; CHECK-NEXT:    mov x27, x28
+; CHECK-NEXT:    mov x28, x0
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    mov x1, x2
+; CHECK-NEXT:    mov x2, x3
+; CHECK-NEXT:    mov x3, x4
+; CHECK-NEXT:    mov x4, x5
+; CHECK-NEXT:    mov x5, x6
+; CHECK-NEXT:    mov x6, x7
+; CHECK-NEXT:    mov x7, x8
+; CHECK-NEXT:    mov x8, x9
+; CHECK-NEXT:    mov x9, x10
+; CHECK-NEXT:    bl callee_with_many_param2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; DARWIN-LABEL: callee_with_many_param:
+; DARWIN:       ; %bb.0:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; DARWIN-NEXT:    .cfi_def_cfa_offset 16
+; DARWIN-NEXT:    .cfi_offset w30, -8
+; DARWIN-NEXT:    .cfi_offset w29, -16
+; DARWIN-NEXT:    mov x19, x20
+; DARWIN-NEXT:    mov x20, x21
+; DARWIN-NEXT:    mov x21, x22
+; DARWIN-NEXT:    mov x22, x23
+; DARWIN-NEXT:    mov x23, x24
+; DARWIN-NEXT:    mov x24, x25
+; DARWIN-NEXT:    mov x25, x26
+; DARWIN-NEXT:    mov x26, x27
+; DARWIN-NEXT:    mov x27, x28
+; DARWIN-NEXT:    mov x28, x0
+; DARWIN-NEXT:    mov x0, x1
+; DARWIN-NEXT:    mov x1, x2
+; DARWIN-NEXT:    mov x2, x3
+; DARWIN-NEXT:    mov x3, x4
+; DARWIN-NEXT:    mov x4, x5
+; DARWIN-NEXT:    mov x5, x6
+; DARWIN-NEXT:    mov x6, x7
+; DARWIN-NEXT:    mov x7, x8
+; DARWIN-NEXT:    mov x8, x9
+; DARWIN-NEXT:    mov x9, x10
+; DARWIN-NEXT:    bl _callee_with_many_param2
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; DARWIN-NEXT:    ret
+  %ret = call preserve_nonecc i64 @callee_with_many_param2(i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21)
+  ret i64 %ret
+}
+
+define i64 @caller3() {
+; CHECK-LABEL: caller3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-160]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 160
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w23, -40
+; CHECK-NEXT:    .cfi_offset w24, -48
+; CHECK-NEXT:    .cfi_offset w25, -56
+; CHECK-NEXT:    .cfi_offset w26, -64
+; CHECK-NEXT:    .cfi_offset w27, -72
+; CHECK-NEXT:    .cfi_offset w28, -80
+; CHECK-NEXT:    .cfi_offset w30, -96
+; CHECK-NEXT:    .cfi_offset b8, -104
+; CHECK-NEXT:    .cfi_offset b9, -112
+; CHECK-NEXT:    .cfi_offset b10, -120
+; CHECK-NEXT:    .cfi_offset b11, -128
+; CHECK-NEXT:    .cfi_offset b12, -136
+; CHECK-NEXT:    .cfi_offset b13, -144
+; CHECK-NEXT:    .cfi_offset b14, -152
+; CHECK-NEXT:    .cfi_offset b15, -160
+; CHECK-NEXT:    mov w19, #1 // =0x1
+; CHECK-NEXT:    mov w20, #2 // =0x2
+; CHECK-NEXT:    mov w21, #3 // =0x3
+; CHECK-NEXT:    mov w22, #4 // =0x4
+; CHECK-NEXT:    mov w23, #5 // =0x5
+; CHECK-NEXT:    mov w24, #6 // =0x6
+; CHECK-NEXT:    mov w25, #7 // =0x7
+; CHECK-NEXT:    mov w26, #8 // =0x8
+; CHECK-NEXT:    mov w27, #9 // =0x9
+; CHECK-NEXT:    mov w28, #10 // =0xa
+; CHECK-NEXT:    mov w0, #11 // =0xb
+; CHECK-NEXT:    mov w1, #12 // =0xc
+; CHECK-NEXT:    mov w2, #13 // =0xd
+; CHECK-NEXT:    mov w3, #14 // =0xe
+; CHECK-NEXT:    mov w4, #15 // =0xf
+; CHECK-NEXT:    mov w5, #16 // =0x10
+; CHECK-NEXT:    mov w6, #17 // =0x11
+; CHECK-NEXT:    mov w7, #18 // =0x12
+; CHECK-NEXT:    mov w8, #19 // =0x13
+; CHECK-NEXT:    mov w9, #20 // =0x14
+; CHECK-NEXT:    mov w10, #21 // =0x15
+; CHECK-NEXT:    bl callee_with_many_param
+; CHECK-NEXT:    ldp x20, x19, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #160 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; DARWIN-LABEL: caller3:
+; DARWIN:       ; %bb.0:
+; DARWIN-NEXT:    stp d15, d14, [sp, #-160]! ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x28, x27, [sp, #64] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x26, x25, [sp, #80] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x24, x23, [sp, #96] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x22, x21, [sp, #112] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x20, x19, [sp, #128] ; 16-byte Folded Spill
+; DARWIN-NEXT:    stp x29, x30, [sp, #144] ; 16-byte Folded Spill
+; DARWIN-NEXT:    .cfi_def_cfa_offset 160
+; DARWIN-NEXT:    .cfi_offset w30, -8
+; DARWIN-NEXT:    .cfi_offset w29, -16
+; DARWIN-NEXT:    .cfi_offset w19, -24
+; DARWIN-NEXT:    .cfi_offset w20, -32
+; DARWIN-NEXT:    .cfi_offset w21, -40
+; DARWIN-NEXT:    .cfi_offset w22, -48
+; DARWIN-NEXT:    .cfi_offset w23, -56
+; DARWIN-NEXT:    .cfi_offset w24, -64
+; DARWIN-NEXT:    .cfi_offset w25, -72
+; DARWIN-NEXT:    .cfi_offset w26, -80
+; DARWIN-NEXT:    .cfi_offset w27, -88
+; DARWIN-NEXT:    .cfi_offset w28, -96
+; DARWIN-NEXT:    .cfi_offset b8, -104
+; DARWIN-NEXT:    .cfi_offset b9, -112
+; DARWIN-NEXT:    .cfi_offset b10, -120
+; DARWIN-NEXT:    .cfi_offset b11, -128
+; DARWIN-NEXT:    .cfi_offset b12, -136
+; DARWIN-NEXT:    .cfi_offset b13, -144
+; DARWIN-NEXT:    .cfi_offset b14, -152
+; DARWIN-NEXT:    .cfi_offset b15, -160
+; DARWIN-NEXT:    mov w19, #1 ; =0x1
+; DARWIN-NEXT:    mov w20, #2 ; =0x2
+; DARWIN-NEXT:    mov w21, #3 ; =0x3
+; DARWIN-NEXT:    mov w22, #4 ; =0x4
+; DARWIN-NEXT:    mov w23, #5 ; =0x5
+; DARWIN-NEXT:    mov w24, #6 ; =0x6
+; DARWIN-NEXT:    mov w25, #7 ; =0x7
+; DARWIN-NEXT:    mov w26, #8 ; =0x8
+; DARWIN-NEXT:    mov w27, #9 ; =0x9
+; DARWIN-NEXT:    mov w28, #10 ; =0xa
+; DARWIN-NEXT:    mov w0, #11 ; =0xb
+; DARWIN-NEXT:    mov w1, #12 ; =0xc
+; DARWIN-NEXT:    mov w2, #13 ; =0xd
+; DARWIN-NEXT:    mov w3, #14 ; =0xe
+; DARWIN-NEXT:    mov w4, #15 ; =0xf
+; DARWIN-NEXT:    mov w5, #16 ; =0x10
+; DARWIN-NEXT:    mov w6, #17 ; =0x11
+; DARWIN-NEXT:    mov w7, #18 ; =0x12
+; DARWIN-NEXT:    mov w8, #19 ; =0x13
+; DARWIN-NEXT:    mov w9, #20 ; =0x14
+; DARWIN-NEXT:    mov w10, #21 ; =0x15
+; DARWIN-NEXT:    bl _callee_with_many_param
+; DARWIN-NEXT:    ldp x29, x30, [sp, #144] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp x20, x19, [sp, #128] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp x22, x21, [sp, #112] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp x24, x23, [sp, #96] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp x26, x25, [sp, #80] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; DARWIN-NEXT:    ldp d15, d14, [sp], #160 ; 16-byte Folded Reload
+; DARWIN-NEXT:    ret
+  %ret = call preserve_nonecc i64 @callee_with_many_param(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21)
+  ret i64 %ret
+}
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_musttail.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_musttail.ll
new file mode 100644
index 00000000000000..8da09a42121b02
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_musttail.ll
@@ -0,0 +1,11 @@
+; RUN: not llc -mtriple=aarch64-unknown-unknown %s -o - 2>&1 | FileCheck %s
+
+; Incompatible calling convention causes following error message.
+
+; CHECK: cannot guarantee tail call due to mismatched calling conv
+
+declare preserve_nonecc void @callee(ptr)
+define void @caller(ptr %a) {
+  musttail call preserve_nonecc void @callee(ptr %a)
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_swift.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_swift.ll
new file mode 100644
index 00000000000000..908fa712679a71
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_swift.ll
@@ -0,0 +1,16 @@
+; RUN: not llc -mtriple=aarch64 %s -o - 2>&1 | FileCheck %s
+
+; Swift attributes should not be used with preserve_none.
+
+declare preserve_nonecc void @foo(ptr swiftself)
+
+; CHECK: error: <unknown>:0:0: in function bar void (ptr): Swift attributes can't be used with preserve_none
+define preserve_nonecc void @bar(ptr swifterror) {
+  ret void
+}
+
+; CHECK: error: <unknown>:0:0: in function qux void (ptr): Swift attributes can't be used with preserve_none
+define void @qux(ptr %addr) {
+  call preserve_nonecc void @foo(ptr swiftself %addr)
+  ret void
+}

>From c20ea0a22cb4f44c07df50b2cc4cfc36873bf58d Mon Sep 17 00:00:00 2001
From: Antonio Abbatangelo <contact at antangelo.com>
Date: Sun, 5 May 2024 00:30:42 -0400
Subject: [PATCH 2/5] Exclude X16 and X17 from register assignment

---
 .../Target/AArch64/AArch64CallingConvention.td | 12 +++++++-----
 .../CodeGen/AArch64/preserve_nonecc_call.ll    | 18 +++++++++++++++---
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 9eee2b65c28fdc..7d24aae99356f6 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -496,23 +496,25 @@ def CC_AArch64_GHC : CallingConv<[
 
 let Entry = 1 in
 def CC_AArch64_Preserve_None : CallingConv<[
-    // We only preserve:
-    // - X18, which is used for the 'nest' parameter.
+    // We can pass arguments in all general registers, except:
+    // - X16/X17, used by the linker as IP0/IP1
+    // - X18, used for the 'nest' parameter
     // - X29, the frame pointer
     // - X30, the link register
-    // All other registers can be used to pass arguments.
+    // General registers are not preserved with the exception of
+    // FP, LR, and X18
     // Non-volatile registers are used first, so functions may call
     // normal functions without saving and reloading arguments.
     CCIfType<[i32], CCAssignToReg<[W19, W20, W21, W22, W23,
                                    W24, W25, W26, W27, W28,
                                    W0, W1, W2, W3, W4, W5,
                                    W6, W7, W8, W9, W10, W11,
-                                   W12, W13, W14, W15, W16, W17]>>,
+                                   W12, W13, W14, W15]>>,
     CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23,
                                    X24, X25, X26, X27, X28,
                                    X0, X1, X2, X3, X4, X5,
                                    X6, X7, X8, X9, X10, X11,
-                                   X12, X13, X14, X15, X16, X17]>>,
+                                   X12, X13, X14, X15]>>,
 
     CCDelegateTo<CC_AArch64_AAPCS>
 ]>;
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
index a8cb8c0947121f..bb0bc182396573 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
@@ -123,13 +123,15 @@ define preserve_nonecc void @caller2(ptr %a) {
 }
 
 ; Preserve_none function can use more registers to pass parameters.
-declare preserve_nonecc i64 @callee_with_many_param2(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21)
-define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21) {
+declare preserve_nonecc i64 @callee_with_many_param2(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24, i64 %a25)
+define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24, i64 %a25) {
 ; CHECK-LABEL: callee_with_many_param:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov x15, x14
+; CHECK-NEXT:    mov x14, x19
 ; CHECK-NEXT:    mov x19, x20
 ; CHECK-NEXT:    mov x20, x21
 ; CHECK-NEXT:    mov x21, x22
@@ -150,6 +152,10 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6
 ; CHECK-NEXT:    mov x7, x8
 ; CHECK-NEXT:    mov x8, x9
 ; CHECK-NEXT:    mov x9, x10
+; CHECK-NEXT:    mov x10, x11
+; CHECK-NEXT:    mov x11, x12
+; CHECK-NEXT:    mov x12, x13
+; CHECK-NEXT:    mov x13, x15
 ; CHECK-NEXT:    bl callee_with_many_param2
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -160,6 +166,8 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6
 ; DARWIN-NEXT:    .cfi_def_cfa_offset 16
 ; DARWIN-NEXT:    .cfi_offset w30, -8
 ; DARWIN-NEXT:    .cfi_offset w29, -16
+; DARWIN-NEXT:    mov x15, x14
+; DARWIN-NEXT:    mov x14, x19
 ; DARWIN-NEXT:    mov x19, x20
 ; DARWIN-NEXT:    mov x20, x21
 ; DARWIN-NEXT:    mov x21, x22
@@ -180,10 +188,14 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6
 ; DARWIN-NEXT:    mov x7, x8
 ; DARWIN-NEXT:    mov x8, x9
 ; DARWIN-NEXT:    mov x9, x10
+; DARWIN-NEXT:    mov x10, x11
+; DARWIN-NEXT:    mov x11, x12
+; DARWIN-NEXT:    mov x12, x13
+; DARWIN-NEXT:    mov x13, x15
 ; DARWIN-NEXT:    bl _callee_with_many_param2
 ; DARWIN-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; DARWIN-NEXT:    ret
-  %ret = call preserve_nonecc i64 @callee_with_many_param2(i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21)
+  %ret = call preserve_nonecc i64 @callee_with_many_param2(i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24, i64 %a25, i64 %a1)
   ret i64 %ret
 }
 

>From f5538e5ff31f982fb9b1ff6164f339cb89c28038 Mon Sep 17 00:00:00 2001
From: Antonio Abbatangelo <contact at antangelo.com>
Date: Sun, 5 May 2024 12:06:46 -0400
Subject: [PATCH 3/5] Fix trailing whitespace

---
 clang/include/clang/Basic/AttrDocs.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index d23465b77e7edd..baa80e7ce51374 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -5662,7 +5662,7 @@ On X86-64 and AArch64 targets, this attribute changes the calling convention of
 The ``preserve_none`` calling convention tries to preserve as few general
 registers as possible. So all general registers are caller saved registers. It
 also uses more general registers to pass arguments. This attribute doesn't
-impact floating-point registers. 
+impact floating-point registers.
 
 - On X86-64, only RSP and RBP are preserved by the callee.
   Registers RDI, RSI, RDX, RCX, R8, R9, R11, R12, R13, R14, R15 and RAX now can

>From f6f1a884b932cff658954f6b77afb81de431f572 Mon Sep 17 00:00:00 2001
From: Antonio Abbatangelo <contact at antangelo.com>
Date: Sun, 5 May 2024 12:10:38 -0400
Subject: [PATCH 4/5] Remove X16 and X17 from arg passing list in documentation

---
 clang/include/clang/Basic/AttrDocs.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index baa80e7ce51374..122b4af1c45c42 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -5669,7 +5669,7 @@ impact floating-point registers.
   be used to pass function arguments. Floating-point registers (XMMs/YMMs) still
   follow the C calling convention.
 - On AArch64, only LR and FP are preserved by the callee.
-  Registers X19-X28 and X0-X17 are used to pass function arguments.
+  Registers X19-X28 and X0-X15 are used to pass function arguments.
   X18, SIMD and floating-point registers follow the AAPCS calling
   convention.
   }];

>From 6ce5644b689cf0d8dff0ad577ef19d34e526b072 Mon Sep 17 00:00:00 2001
From: Antonio Abbatangelo <contact at antangelo.com>
Date: Sun, 5 May 2024 12:12:48 -0400
Subject: [PATCH 5/5] Update mention of X16-X17 in the documentation

---
 clang/include/clang/Basic/AttrDocs.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 122b4af1c45c42..04db41e5ff9768 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -5670,7 +5670,7 @@ impact floating-point registers.
   follow the C calling convention.
 - On AArch64, only LR and FP are preserved by the callee.
   Registers X19-X28 and X0-X15 are used to pass function arguments.
-  X18, SIMD and floating-point registers follow the AAPCS calling
+  X16-X18, SIMD and floating-point registers follow the AAPCS calling
   convention.
   }];
 }



More information about the llvm-commits mailing list