[llvm] [AArch64] Fix return address auth in swiftasync epilogues (PR #189484)

Jon Roelofs via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 3 16:53:28 PDT 2026


https://github.com/jroelofs updated https://github.com/llvm/llvm-project/pull/189484

>From 48c350c9b41f00ad644443ce0936dd56324ef3f4 Mon Sep 17 00:00:00 2001
From: Oskar Wirga <oskar.wirga at gmail.com>
Date: Thu, 26 Mar 2026 10:40:31 -0700
Subject: [PATCH 01/10] [AArch64] Fix return address auth in tail call
 epilogues

When a tail call has a non-zero FPDiff (callee needs different stack
argument space), SP has already been adjusted by the epilogue before
the return address authentication. AUTI[AB]SP authenticates LR using
the current SP, which no longer matches the entry SP used by
PACI[AB]SP at function entry, causing EXC_ARM_PAC_FAIL.

Fix by computing the entry SP into X16 and using explicit
AUTI[AB] x30, x16 instead of AUTI[AB]SP when FPDiff != 0.

Also moves getArgumentStackToRestore from private to public in
AArch64FrameLowering.h so AArch64PointerAuth.cpp can access it
(existing callers use friend access which doesn't work across
anonymous namespaces).
---
 .../lib/Target/AArch64/AArch64FrameLowering.h | 18 ++++-----
 .../lib/Target/AArch64/AArch64PointerAuth.cpp | 27 +++++++++++--
 .../CodeGen/AArch64/arm64e-tail-call-autib.ll | 38 +++++++++++++++++++
 3 files changed, 71 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 7ef2c4f388c7c..19cc93d01013d 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -87,6 +87,15 @@ class AArch64FrameLowering : public TargetFrameLowering {
   /// Can this function use the red zone for local allocations.
   bool canUseRedZone(const MachineFunction &MF) const;
 
+  /// Returns how much of the incoming argument stack area (in bytes) we should
+  /// clean up in an epilogue. For the C calling convention this will be 0, for
+  /// guaranteed tail call conventions it can be positive (a normal return or a
+  /// tail call to a function that uses less stack space for arguments) or
+  /// negative (for a tail call to a function that needs more stack space than
+  /// us for arguments).
+  int64_t getArgumentStackToRestore(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const;
+
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
 
   bool
@@ -239,15 +248,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
                                         const AArch64InstrInfo &TII,
                                         MachineInstr::MIFlag Flag) const;
 
-  /// Returns how much of the incoming argument stack area (in bytes) we should
-  /// clean up in an epilogue. For the C calling convention this will be 0, for
-  /// guaranteed tail call conventions it can be positive (a normal return or a
-  /// tail call to a function that uses less stack space for arguments) or
-  /// negative (for a tail call to a function that needs more stack space than
-  /// us for arguments).
-  int64_t getArgumentStackToRestore(MachineFunction &MF,
-                                    MachineBasicBlock &MBB) const;
-
   // Find a scratch register that we can use at the start of the prologue to
   // re-align the stack pointer.  We avoid using callee-save registers since
   // they may appear to be free when this is called from canUseAsPrologue
diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index 517b8a4c1737b..a3f03f6068e93 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -9,6 +9,7 @@
 #include "AArch64PointerAuth.h"
 
 #include "AArch64.h"
+#include "AArch64FrameLowering.h"
 #include "AArch64InstrInfo.h"
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64Subtarget.h"
@@ -196,9 +197,29 @@ void AArch64PointerAuth::authenticateLR(
             .setMIFlag(MachineInstr::FrameDestroy);
         emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
       }
-      BuildMI(MBB, MBBI, DL,
-              TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
-          .setMIFlag(MachineInstr::FrameDestroy);
+      // When a tail call has a non-zero FPDiff (callee needs different stack
+      // arg space), the epilogue adjusts SP before reaching here. SP no
+      // longer equals the entry SP used by PACI[AB]SP. Compute the entry SP
+      // into X16 and use explicit AUTI[AB] instead of AUTI[AB]SP.
+      // entry_SP = SP - FPDiff (FPDiff is negative when callee needs more
+      // space, positive when less).
+      auto &AFL = *static_cast<const AArch64FrameLowering *>(
+          MF.getSubtarget().getFrameLowering());
+      int64_t FPDiff = AFL.getArgumentStackToRestore(MF, MBB);
+      if (FPDiff != 0) {
+        emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
+                        StackOffset::getFixed(-FPDiff), TII,
+                        MachineInstr::FrameDestroy);
+        unsigned AutOpc = UseBKey ? AArch64::AUTIB : AArch64::AUTIA;
+        BuildMI(MBB, MBBI, DL, TII->get(AutOpc), AArch64::LR)
+            .addUse(AArch64::LR)
+            .addUse(AArch64::X16)
+            .setMIFlag(MachineInstr::FrameDestroy);
+      } else {
+        BuildMI(MBB, MBBI, DL,
+                TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
+            .setMIFlag(MachineInstr::FrameDestroy);
+      }
       if (!MFnI->branchProtectionPAuthLR())
         emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
     }
diff --git a/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll b/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll
new file mode 100644
index 0000000000000..5b83c5bd98287
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple arm64e-apple-darwin -o - %s | FileCheck %s
+;
+; In Swift async functions using swifttailcc, a tail call
+; with stack arguments adjusts SP in the epilogue before return address
+; authentication. AUTIBSP uses the current (adjusted) SP, not the entry
+; SP from PACIBSP, causing EXC_ARM_PAC_FAIL on arm64e.
+;
+; Fix: When FPDiff != 0, compute the entry SP into x16 and use explicit
+; autib x30, x16 instead of autibsp.
+
+declare swifttailcc void @callee_async(ptr swiftasync %ctx, i64, i64, i64, i64, i64, i64, i64, i64, i64)
+
+; FPDiff != 0: callee has stack args that this function doesn't.
+; Must use explicit autib with computed entry SP, NOT autibsp.
+define swifttailcc void @test_async_tail_call(ptr swiftasync %ctx) #0 {
+; CHECK-LABEL: _test_async_tail_call:
+; CHECK:         pacibsp
+; CHECK-NOT:     autibsp
+; CHECK:         autib x30, x16
+; CHECK:         b _callee_async
+  musttail call swifttailcc void @callee_async(ptr swiftasync %ctx, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
+  ret void
+}
+
+declare swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
+
+; FPDiff == 0: callee has same stack arg layout. autibsp is correct here.
+define swifttailcc void @test_no_fpdiff_tail_call(ptr swiftasync %ctx) #0 {
+; CHECK-LABEL: _test_no_fpdiff_tail_call:
+; CHECK:         pacibsp
+; CHECK:         autibsp
+; CHECK-NOT:     autib x30, x16
+; CHECK:         b _callee_no_stack_args
+  musttail call swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
+  ret void
+}
+
+attributes #0 = { nounwind "ptrauth-returns" "ptrauth-auth-traps" "sign-return-address"="all" "frame-pointer"="all" }

>From d6e1667ccbec23e7e54568279b1e347f0ff8130d Mon Sep 17 00:00:00 2001
From: Oskar Wirga <oskar.wirga at gmail.com>
Date: Mon, 30 Mar 2026 15:11:38 -0700
Subject: [PATCH 02/10] Fix PAuthLR path for FPDiff != 0 tail calls

Apply the same entry SP computation to the PAuthLR
(branchProtectionPAuthLR + hasPAuthLR) path. When FPDiff != 0,
autiasppc uses the wrong SP just like autiasp does. Fall back to
explicit autia/autib x30, x16 with computed entry SP.

Hoists the FPDiff computation above both branches to avoid
duplication.
---
 .../lib/Target/AArch64/AArch64PointerAuth.cpp | 36 ++++++++++++-------
 .../AArch64/pauth-lr-tail-call-fpdiff.ll      | 34 ++++++++++++++++++
 2 files changed, 57 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll

diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index a3f03f6068e93..b76cbee22bd96 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -183,13 +183,32 @@ void AArch64PointerAuth::authenticateLR(
     }
     MBB.erase(TI);
   } else {
+    // When a tail call has a non-zero FPDiff (callee needs different stack
+    // arg space), the epilogue adjusts SP before reaching here. SP no
+    // longer equals the entry SP used by PACI[AB]SP. Compute the entry SP
+    // into X16 and use explicit AUTI[AB] instead of AUTI[AB]SP.
+    auto &AFL = *static_cast<const AArch64FrameLowering *>(
+        MF.getSubtarget().getFrameLowering());
+    int64_t FPDiff = AFL.getArgumentStackToRestore(MF, MBB);
+
     if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
       assert(PACSym && "No PAC instruction to refer to");
       emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
-      BuildMI(MBB, MBBI, DL,
-              TII->get(UseBKey ? AArch64::AUTIBSPPCi : AArch64::AUTIASPPCi))
-          .addSym(PACSym)
-          .setMIFlag(MachineInstr::FrameDestroy);
+      if (FPDiff != 0) {
+        emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
+                        StackOffset::getFixed(-FPDiff), TII,
+                        MachineInstr::FrameDestroy);
+        unsigned AutOpc = UseBKey ? AArch64::AUTIB : AArch64::AUTIA;
+        BuildMI(MBB, MBBI, DL, TII->get(AutOpc), AArch64::LR)
+            .addUse(AArch64::LR)
+            .addUse(AArch64::X16)
+            .setMIFlag(MachineInstr::FrameDestroy);
+      } else {
+        BuildMI(MBB, MBBI, DL,
+                TII->get(UseBKey ? AArch64::AUTIBSPPCi : AArch64::AUTIASPPCi))
+            .addSym(PACSym)
+            .setMIFlag(MachineInstr::FrameDestroy);
+      }
     } else {
       if (MFnI->branchProtectionPAuthLR()) {
         emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
@@ -197,15 +216,6 @@ void AArch64PointerAuth::authenticateLR(
             .setMIFlag(MachineInstr::FrameDestroy);
         emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
       }
-      // When a tail call has a non-zero FPDiff (callee needs different stack
-      // arg space), the epilogue adjusts SP before reaching here. SP no
-      // longer equals the entry SP used by PACI[AB]SP. Compute the entry SP
-      // into X16 and use explicit AUTI[AB] instead of AUTI[AB]SP.
-      // entry_SP = SP - FPDiff (FPDiff is negative when callee needs more
-      // space, positive when less).
-      auto &AFL = *static_cast<const AArch64FrameLowering *>(
-          MF.getSubtarget().getFrameLowering());
-      int64_t FPDiff = AFL.getArgumentStackToRestore(MF, MBB);
       if (FPDiff != 0) {
         emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
                         StackOffset::getFixed(-FPDiff), TII,
diff --git a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
new file mode 100644
index 0000000000000..19a59e1ba634a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+v9a,+pauth-lr -o - %s \
+; RUN:   | FileCheck %s
+;
+; When a tail call has FPDiff != 0 and PAuthLR is enabled, the SP-based
+; autiasppc instruction uses the wrong SP value (adjusted for the tail
+; call's stack args). The fix computes entry SP into x16 and uses
+; explicit autia instead.
+
+declare swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64, i64, i64, i64, i64, i64, i64, i64, i64)
+declare swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
+
+; FPDiff != 0 with PAuthLR: must use explicit autia, not autiasppc.
+define swifttailcc void @test_pauthlr_fpdiff(ptr swiftasync %ctx) #0 {
+; CHECK-LABEL: test_pauthlr_fpdiff:
+; CHECK:         paciasppc
+; CHECK-NOT:     autiasppc
+; CHECK:         autia x30, x16
+; CHECK:         b callee_stack_args
+  musttail call swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
+  ret void
+}
+
+; FPDiff == 0 with PAuthLR: autiasppc is correct.
+define swifttailcc void @test_pauthlr_no_fpdiff(ptr swiftasync %ctx) #0 {
+; CHECK-LABEL: test_pauthlr_no_fpdiff:
+; CHECK:         paciasppc
+; CHECK:         autiasppc
+; CHECK-NOT:     autia x30, x16
+; CHECK:         b callee_no_stack_args
+  musttail call swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
+  ret void
+}
+
+attributes #0 = { nounwind "branch-protection-pauth-lr" "sign-return-address"="all" "frame-pointer"="all" }

>From 4c63035d1a969d7df36a9d86c782a15a6e6a3e81 Mon Sep 17 00:00:00 2001
From: Oskar Wirga <oskar.wirga at gmail.com>
Date: Mon, 30 Mar 2026 15:58:22 -0700
Subject: [PATCH 03/10] Use autia171615 for PAuthLR path when FPDiff != 0

paciasppc signs LR with both SP and the signing PC as the modifier,
so plain AUTIA (which only takes SP) is insufficient. Use
autia171615/autib171615 which takes x17=pointer, x16=entry_SP,
x15=signing_PC.

Also guard the NOP-space PAuthLR path against applying the FPDiff
fix, since PACM requires x16 to hold the signing PC and overwriting
it with entry SP would break the modifier.
---
 .../lib/Target/AArch64/AArch64PointerAuth.cpp | 27 +++++++++++++++----
 .../AArch64/pauth-lr-tail-call-fpdiff.ll      | 10 +++----
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index b76cbee22bd96..b6f3800fe6b04 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -195,13 +195,30 @@ void AArch64PointerAuth::authenticateLR(
       assert(PACSym && "No PAC instruction to refer to");
       emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
       if (FPDiff != 0) {
+        // Use AUTIA171615/AUTIB171615: x17=LR, x16=entry_SP, x15=signing_PC.
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
+            .addReg(AArch64::XZR)
+            .addReg(AArch64::LR)
+            .addImm(0)
+            .setMIFlag(MachineInstr::FrameDestroy);
         emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
                         StackOffset::getFixed(-FPDiff), TII,
                         MachineInstr::FrameDestroy);
-        unsigned AutOpc = UseBKey ? AArch64::AUTIB : AArch64::AUTIA;
-        BuildMI(MBB, MBBI, DL, TII->get(AutOpc), AArch64::LR)
-            .addUse(AArch64::LR)
-            .addUse(AArch64::X16)
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADRP), AArch64::X15)
+            .addSym(PACSym, AArch64II::MO_PAGE)
+            .setMIFlag(MachineInstr::FrameDestroy);
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
+            .addReg(AArch64::X15)
+            .addSym(PACSym, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
+            .addImm(0)
+            .setMIFlag(MachineInstr::FrameDestroy);
+        unsigned AutOpc = UseBKey ? AArch64::AUTIB171615 : AArch64::AUTIA171615;
+        BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
+            .setMIFlag(MachineInstr::FrameDestroy);
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::LR)
+            .addReg(AArch64::XZR)
+            .addReg(AArch64::X17)
+            .addImm(0)
             .setMIFlag(MachineInstr::FrameDestroy);
       } else {
         BuildMI(MBB, MBBI, DL,
@@ -216,7 +233,7 @@ void AArch64PointerAuth::authenticateLR(
             .setMIFlag(MachineInstr::FrameDestroy);
         emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
       }
-      if (FPDiff != 0) {
+      if (FPDiff != 0 && !MFnI->branchProtectionPAuthLR()) {
         emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
                         StackOffset::getFixed(-FPDiff), TII,
                         MachineInstr::FrameDestroy);
diff --git a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
index 19a59e1ba634a..b93ff8aa54059 100644
--- a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
+++ b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
@@ -3,18 +3,18 @@
 ;
 ; When a tail call has FPDiff != 0 and PAuthLR is enabled, the SP-based
 ; autiasppc instruction uses the wrong SP value (adjusted for the tail
-; call's stack args). The fix computes entry SP into x16 and uses
-; explicit autia instead.
+; call's stack args). The fix uses autia171615 with entry SP in x16 and
+; the signing PC in x15.
 
 declare swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64, i64, i64, i64, i64, i64, i64, i64, i64)
 declare swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
 
-; FPDiff != 0 with PAuthLR: must use explicit autia, not autiasppc.
+; FPDiff != 0 with PAuthLR: must use autia171615, not autiasppc.
 define swifttailcc void @test_pauthlr_fpdiff(ptr swiftasync %ctx) #0 {
 ; CHECK-LABEL: test_pauthlr_fpdiff:
 ; CHECK:         paciasppc
 ; CHECK-NOT:     autiasppc
-; CHECK:         autia x30, x16
+; CHECK:         autia171615
 ; CHECK:         b callee_stack_args
   musttail call swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
   ret void
@@ -25,7 +25,7 @@ define swifttailcc void @test_pauthlr_no_fpdiff(ptr swiftasync %ctx) #0 {
 ; CHECK-LABEL: test_pauthlr_no_fpdiff:
 ; CHECK:         paciasppc
 ; CHECK:         autiasppc
-; CHECK-NOT:     autia x30, x16
+; CHECK-NOT:     autia171615
 ; CHECK:         b callee_no_stack_args
   musttail call swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
   ret void

>From 83e07a6894e7819c748f2a2deb09e224e83c9fef Mon Sep 17 00:00:00 2001
From: Oskar Wirga <oskar at wirga.com>
Date: Thu, 2 Apr 2026 13:20:05 -0400
Subject: [PATCH 04/10] Restructure authenticateLR for all PAuthLR x FPDiff
 cases

Rewrite the else branch of authenticateLR to cleanly handle all
combinations of {hasPAuthLR, NOP-space PAuthLR, plain pac-ret} x
{FPDiff==0, FPDiff!=0} as orthogonal dimensions.

Mode 1 (FEAT_PAuth_LR HW): Use AUTIA171615/AUTIB171615 when FPDiff!=0
  (x17=LR, x16=entry_SP, x15=PACSym), AUTIASPPCi/AUTIBSPPCi otherwise.

Mode 2 (NOP-space compat): PACM + AUTIASP/AUTIBSP always, but when
  FPDiff!=0 temporarily restore SP to entry value around the auth.

Mode 3 (plain pac-ret): AUTIA/AUTIB x30, x16 when FPDiff!=0
  (x16=entry_SP), AUTIASP/AUTIBSP otherwise.

Rewrite tests with update_llc_test_checks.py auto-generated CHECK
lines, 3 RUN lines (COMPAT/V83A/PAUTHLR), and A-key + B-key variants.
---
 .../lib/Target/AArch64/AArch64PointerAuth.cpp |  47 ++-
 .../CodeGen/AArch64/arm64e-tail-call-autib.ll |  98 +++--
 .../AArch64/pauth-lr-tail-call-fpdiff.ll      | 385 +++++++++++++++++-
 3 files changed, 469 insertions(+), 61 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index b6f3800fe6b04..16e56962f0bdf 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -183,19 +183,21 @@ void AArch64PointerAuth::authenticateLR(
     }
     MBB.erase(TI);
   } else {
-    // When a tail call has a non-zero FPDiff (callee needs different stack
-    // arg space), the epilogue adjusts SP before reaching here. SP no
-    // longer equals the entry SP used by PACI[AB]SP. Compute the entry SP
-    // into X16 and use explicit AUTI[AB] instead of AUTI[AB]SP.
+    // When FPDiff != 0 (tail call with different stack arg space), SP has
+    // been adjusted and no longer matches the entry SP used as the signing
+    // modifier. We must reconstruct entry SP for authentication.
+    // The three signing modes (hasPAuthLR, NOP-space PAuthLR, plain pac-ret)
+    // each need different handling — see inline comments below.
     auto &AFL = *static_cast<const AArch64FrameLowering *>(
         MF.getSubtarget().getFrameLowering());
     int64_t FPDiff = AFL.getArgumentStackToRestore(MF, MBB);
 
     if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
+      // FEAT_PAuth_LR: use AUTIASPPCi or AUTIA171615 for FPDiff != 0.
       assert(PACSym && "No PAC instruction to refer to");
       emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
       if (FPDiff != 0) {
-        // Use AUTIA171615/AUTIB171615: x17=LR, x16=entry_SP, x15=signing_PC.
+        // x17=LR, x16=entry_SP, x15=PACSym. Result in x17 → LR.
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
             .addReg(AArch64::XZR)
             .addReg(AArch64::LR)
@@ -226,14 +228,32 @@ void AArch64PointerAuth::authenticateLR(
             .addSym(PACSym)
             .setMIFlag(MachineInstr::FrameDestroy);
       }
-    } else {
-      if (MFnI->branchProtectionPAuthLR()) {
-        emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
-            .setMIFlag(MachineInstr::FrameDestroy);
-        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
+    } else if (MFnI->branchProtectionPAuthLR()) {
+      // NOP-space PAuthLR compat: PACM + AUTIASP. AUTIASP reads SP
+      // implicitly, so when FPDiff != 0 we restore SP to entry value
+      // around the auth instruction.
+      assert(PACSym && "No PAC instruction to refer to");
+      emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
+          .setMIFlag(MachineInstr::FrameDestroy);
+      emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
+      if (FPDiff != 0) {
+        emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+                        StackOffset::getFixed(-FPDiff), TII,
+                        MachineInstr::FrameDestroy);
       }
-      if (FPDiff != 0 && !MFnI->branchProtectionPAuthLR()) {
+      BuildMI(MBB, MBBI, DL,
+              TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
+          .setMIFlag(MachineInstr::FrameDestroy);
+      if (FPDiff != 0) {
+        emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+                        StackOffset::getFixed(FPDiff), TII,
+                        MachineInstr::FrameDestroy);
+      }
+    } else {
+      // Plain pac-ret: compute entry SP into x16 when FPDiff != 0.
+      if (FPDiff != 0) {
+        // entry_SP = current_SP - FPDiff.
         emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
                         StackOffset::getFixed(-FPDiff), TII,
                         MachineInstr::FrameDestroy);
@@ -247,8 +267,7 @@ void AArch64PointerAuth::authenticateLR(
                 TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
             .setMIFlag(MachineInstr::FrameDestroy);
       }
-      if (!MFnI->branchProtectionPAuthLR())
-        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
+      emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
     }
 
     if (NeedsWinCFI) {
diff --git a/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll b/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll
index 5b83c5bd98287..b880717482a42 100644
--- a/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll
+++ b/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll
@@ -1,38 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple arm64e-apple-darwin -o - %s | FileCheck %s
 ;
-; In Swift async functions using swifttailcc, a tail call
-; with stack arguments adjusts SP in the epilogue before return address
-; authentication. AUTIBSP uses the current (adjusted) SP, not the entry
-; SP from PACIBSP, causing EXC_ARM_PAC_FAIL on arm64e.
-;
-; Fix: When FPDiff != 0, compute the entry SP into x16 and use explicit
-; autib x30, x16 instead of autibsp.
+; arm64e uses B-key signing via "ptrauth-returns". Test that tail calls
+; with FPDiff != 0 use explicit autib x30, x16 (entry SP) instead of
+; autibsp (wrong SP), and that FPDiff == 0 still uses autibsp.
 
 declare swifttailcc void @callee_async(ptr swiftasync %ctx, i64, i64, i64, i64, i64, i64, i64, i64, i64)
+declare swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
 
-; FPDiff != 0: callee has stack args that this function doesn't.
-; Must use explicit autib with computed entry SP, NOT autibsp.
-define swifttailcc void @test_async_tail_call(ptr swiftasync %ctx) #0 {
-; CHECK-LABEL: _test_async_tail_call:
-; CHECK:         pacibsp
-; CHECK-NOT:     autibsp
-; CHECK:         autib x30, x16
-; CHECK:         b _callee_async
+; FPDiff != 0: Must use explicit autib with computed entry SP.
+define swifttailcc void @test_async_tail_call(ptr swiftasync %ctx) "ptrauth-returns" "ptrauth-auth-traps" "sign-return-address"="all" "frame-pointer"="all" {
+; CHECK-LABEL: test_async_tail_call:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    .cfi_b_key_frame
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    .cfi_negate_ra_state
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x16, sp, #8
+; CHECK-NEXT:    movk x16, #49946, lsl #48
+; CHECK-NEXT:    mov x17, x22
+; CHECK-NEXT:    pacdb x17, x16
+; CHECK-NEXT:    str x17, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 32
+; CHECK-NEXT:    .cfi_offset w30, -24
+; CHECK-NEXT:    .cfi_offset w29, -32
+; CHECK-NEXT:    mov w8, #9 ; =0x9
+; CHECK-NEXT:    str x8, [x29, #16]
+; CHECK-NEXT:    mov w0, #1 ; =0x1
+; CHECK-NEXT:    mov w1, #2 ; =0x2
+; CHECK-NEXT:    mov w2, #3 ; =0x3
+; CHECK-NEXT:    mov w3, #4 ; =0x4
+; CHECK-NEXT:    mov w4, #5 ; =0x5
+; CHECK-NEXT:    mov w5, #6 ; =0x6
+; CHECK-NEXT:    mov w6, #7 ; =0x7
+; CHECK-NEXT:    mov w7, #8 ; =0x8
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    add x16, sp, #16
+; CHECK-NEXT:    autib x30, x16
+; CHECK-NEXT:    eor x16, x30, x30, lsl #1
+; CHECK-NEXT:    tbz x16, #62, Lauth_success_0
+; CHECK-NEXT:    brk #0xc471
+; CHECK-NEXT:  Lauth_success_0:
+; CHECK-NEXT:    b _callee_async
   musttail call swifttailcc void @callee_async(ptr swiftasync %ctx, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
   ret void
 }
 
-declare swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
-
-; FPDiff == 0: callee has same stack arg layout. autibsp is correct here.
-define swifttailcc void @test_no_fpdiff_tail_call(ptr swiftasync %ctx) #0 {
-; CHECK-LABEL: _test_no_fpdiff_tail_call:
-; CHECK:         pacibsp
-; CHECK:         autibsp
-; CHECK-NOT:     autib x30, x16
-; CHECK:         b _callee_no_stack_args
+; FPDiff == 0: autibsp is correct here.
+define swifttailcc void @test_no_fpdiff_tail_call(ptr swiftasync %ctx) "ptrauth-returns" "ptrauth-auth-traps" "sign-return-address"="all" "frame-pointer"="all" {
+; CHECK-LABEL: test_no_fpdiff_tail_call:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    .cfi_b_key_frame
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    .cfi_negate_ra_state
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x16, sp, #8
+; CHECK-NEXT:    movk x16, #49946, lsl #48
+; CHECK-NEXT:    mov x17, x22
+; CHECK-NEXT:    pacdb x17, x16
+; CHECK-NEXT:    str x17, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    autibsp
+; CHECK-NEXT:    eor x16, x30, x30, lsl #1
+; CHECK-NEXT:    tbz x16, #62, Lauth_success_1
+; CHECK-NEXT:    brk #0xc471
+; CHECK-NEXT:  Lauth_success_1:
+; CHECK-NEXT:    b _callee_no_stack_args
   musttail call swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
   ret void
 }
-
-attributes #0 = { nounwind "ptrauth-returns" "ptrauth-auth-traps" "sign-return-address"="all" "frame-pointer"="all" }
diff --git a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
index b93ff8aa54059..1b0904b9cda14 100644
--- a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
+++ b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
@@ -1,34 +1,377 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+v9a,+pauth-lr -o - %s \
-; RUN:   | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64              < %s | FileCheck --check-prefixes=CHECK,COMPAT %s
+; RUN: llc -mtriple=aarch64 -mattr=v8.3a < %s | FileCheck --check-prefixes=CHECK,V83A %s
+; RUN: llc -mtriple=aarch64 -mattr=v9a -mattr=pauth-lr < %s | FileCheck --check-prefixes=PAUTHLR %s
 ;
-; When a tail call has FPDiff != 0 and PAuthLR is enabled, the SP-based
-; autiasppc instruction uses the wrong SP value (adjusted for the tail
-; call's stack args). The fix uses autia171615 with entry SP in x16 and
-; the signing PC in x15.
+; Test tail calls with return address signing across all three modes
+; (COMPAT, V83A, PAUTHLR) and both FPDiff==0 and FPDiff!=0 cases,
+; with A-key and B-key variants.
 
 declare swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64, i64, i64, i64, i64, i64, i64, i64, i64)
 declare swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
 
-; FPDiff != 0 with PAuthLR: must use autia171615, not autiasppc.
-define swifttailcc void @test_pauthlr_fpdiff(ptr swiftasync %ctx) #0 {
-; CHECK-LABEL: test_pauthlr_fpdiff:
-; CHECK:         paciasppc
-; CHECK-NOT:     autiasppc
-; CHECK:         autia171615
-; CHECK:         b callee_stack_args
+; FPDiff != 0, A-key: callee has stack args that this function doesn't.
+define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "frame-pointer"="all" {
+; COMPAT-LABEL: tail_call_fpdiff_a_key:
+; COMPAT:       // %bb.0:
+; COMPAT-NEXT:    hint #39
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:  .Ltmp0:
+; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
+; COMPAT-NEXT:    sub sp, sp, #48
+; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; COMPAT-NEXT:    str x22, [sp, #8]
+; COMPAT-NEXT:    add x29, sp, #16
+; COMPAT-NEXT:    .cfi_def_cfa w29, 32
+; COMPAT-NEXT:    .cfi_offset w30, -24
+; COMPAT-NEXT:    .cfi_offset w29, -32
+; COMPAT-NEXT:    mov w8, #9 // =0x9
+; COMPAT-NEXT:    mov w0, #1 // =0x1
+; COMPAT-NEXT:    mov w1, #2 // =0x2
+; COMPAT-NEXT:    str x8, [x29, #16]
+; COMPAT-NEXT:    mov w2, #3 // =0x3
+; COMPAT-NEXT:    mov w3, #4 // =0x4
+; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; COMPAT-NEXT:    mov w4, #5 // =0x5
+; COMPAT-NEXT:    mov w5, #6 // =0x6
+; COMPAT-NEXT:    mov w6, #7 // =0x7
+; COMPAT-NEXT:    mov w7, #8 // =0x8
+; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
+; COMPAT-NEXT:    add sp, sp, #32
+; COMPAT-NEXT:    adrp x16, .Ltmp0
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp0
+; COMPAT-NEXT:    hint #39
+; COMPAT-NEXT:    add sp, sp, #16
+; COMPAT-NEXT:    hint #29
+; COMPAT-NEXT:    sub sp, sp, #16
+; COMPAT-NEXT:    b callee_stack_args
+;
+; V83A-LABEL: tail_call_fpdiff_a_key:
+; V83A:       // %bb.0:
+; V83A-NEXT:    hint #39
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:  .Ltmp0:
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    orr x29, x29, #0x1000000000000000
+; V83A-NEXT:    sub sp, sp, #48
+; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; V83A-NEXT:    str x22, [sp, #8]
+; V83A-NEXT:    add x29, sp, #16
+; V83A-NEXT:    .cfi_def_cfa w29, 32
+; V83A-NEXT:    .cfi_offset w30, -24
+; V83A-NEXT:    .cfi_offset w29, -32
+; V83A-NEXT:    mov w8, #9 // =0x9
+; V83A-NEXT:    mov w0, #1 // =0x1
+; V83A-NEXT:    mov w1, #2 // =0x2
+; V83A-NEXT:    str x8, [x29, #16]
+; V83A-NEXT:    mov w2, #3 // =0x3
+; V83A-NEXT:    mov w3, #4 // =0x4
+; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; V83A-NEXT:    mov w4, #5 // =0x5
+; V83A-NEXT:    mov w5, #6 // =0x6
+; V83A-NEXT:    mov w6, #7 // =0x7
+; V83A-NEXT:    mov w7, #8 // =0x8
+; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    adrp x16, .Ltmp0
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp0
+; V83A-NEXT:    hint #39
+; V83A-NEXT:    add sp, sp, #16
+; V83A-NEXT:    autiasp
+; V83A-NEXT:    sub sp, sp, #16
+; V83A-NEXT:    b callee_stack_args
+;
+; PAUTHLR-LABEL: tail_call_fpdiff_a_key:
+; PAUTHLR:       // %bb.0:
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:  .Ltmp0:
+; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
+; PAUTHLR-NEXT:    sub sp, sp, #48
+; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; PAUTHLR-NEXT:    str x22, [sp, #8]
+; PAUTHLR-NEXT:    add x29, sp, #16
+; PAUTHLR-NEXT:    .cfi_def_cfa w29, 32
+; PAUTHLR-NEXT:    .cfi_offset w30, -24
+; PAUTHLR-NEXT:    .cfi_offset w29, -32
+; PAUTHLR-NEXT:    mov w8, #9 // =0x9
+; PAUTHLR-NEXT:    mov w0, #1 // =0x1
+; PAUTHLR-NEXT:    mov w1, #2 // =0x2
+; PAUTHLR-NEXT:    str x8, [x29, #16]
+; PAUTHLR-NEXT:    mov w2, #3 // =0x3
+; PAUTHLR-NEXT:    mov w3, #4 // =0x4
+; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; PAUTHLR-NEXT:    mov w4, #5 // =0x5
+; PAUTHLR-NEXT:    mov w5, #6 // =0x6
+; PAUTHLR-NEXT:    mov w6, #7 // =0x7
+; PAUTHLR-NEXT:    mov w7, #8 // =0x8
+; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
+; PAUTHLR-NEXT:    add sp, sp, #32
+; PAUTHLR-NEXT:    mov x17, x30
+; PAUTHLR-NEXT:    add x16, sp, #16
+; PAUTHLR-NEXT:    adrp x15, .Ltmp0
+; PAUTHLR-NEXT:    add x15, x15, :lo12:.Ltmp0
+; PAUTHLR-NEXT:    autia171615
+; PAUTHLR-NEXT:    mov x30, x17
+; PAUTHLR-NEXT:    b callee_stack_args
+  musttail call swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
+  ret void
+}
+
+; FPDiff != 0, B-key: callee has stack args that this function doesn't.
+define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "sign-return-address-key"="b_key" "frame-pointer"="all" {
+; COMPAT-LABEL: tail_call_fpdiff_b_key:
+; COMPAT:       // %bb.0:
+; COMPAT-NEXT:    .cfi_b_key_frame
+; COMPAT-NEXT:    hint #39
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:  .Ltmp1:
+; COMPAT-NEXT:    hint #27
+; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
+; COMPAT-NEXT:    sub sp, sp, #48
+; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; COMPAT-NEXT:    str x22, [sp, #8]
+; COMPAT-NEXT:    add x29, sp, #16
+; COMPAT-NEXT:    .cfi_def_cfa w29, 32
+; COMPAT-NEXT:    .cfi_offset w30, -24
+; COMPAT-NEXT:    .cfi_offset w29, -32
+; COMPAT-NEXT:    mov w8, #9 // =0x9
+; COMPAT-NEXT:    mov w0, #1 // =0x1
+; COMPAT-NEXT:    mov w1, #2 // =0x2
+; COMPAT-NEXT:    str x8, [x29, #16]
+; COMPAT-NEXT:    mov w2, #3 // =0x3
+; COMPAT-NEXT:    mov w3, #4 // =0x4
+; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; COMPAT-NEXT:    mov w4, #5 // =0x5
+; COMPAT-NEXT:    mov w5, #6 // =0x6
+; COMPAT-NEXT:    mov w6, #7 // =0x7
+; COMPAT-NEXT:    mov w7, #8 // =0x8
+; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
+; COMPAT-NEXT:    add sp, sp, #32
+; COMPAT-NEXT:    adrp x16, .Ltmp1
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp1
+; COMPAT-NEXT:    hint #39
+; COMPAT-NEXT:    add sp, sp, #16
+; COMPAT-NEXT:    hint #31
+; COMPAT-NEXT:    sub sp, sp, #16
+; COMPAT-NEXT:    b callee_stack_args
+;
+; V83A-LABEL: tail_call_fpdiff_b_key:
+; V83A:       // %bb.0:
+; V83A-NEXT:    .cfi_b_key_frame
+; V83A-NEXT:    hint #39
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:  .Ltmp1:
+; V83A-NEXT:    pacibsp
+; V83A-NEXT:    orr x29, x29, #0x1000000000000000
+; V83A-NEXT:    sub sp, sp, #48
+; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; V83A-NEXT:    str x22, [sp, #8]
+; V83A-NEXT:    add x29, sp, #16
+; V83A-NEXT:    .cfi_def_cfa w29, 32
+; V83A-NEXT:    .cfi_offset w30, -24
+; V83A-NEXT:    .cfi_offset w29, -32
+; V83A-NEXT:    mov w8, #9 // =0x9
+; V83A-NEXT:    mov w0, #1 // =0x1
+; V83A-NEXT:    mov w1, #2 // =0x2
+; V83A-NEXT:    str x8, [x29, #16]
+; V83A-NEXT:    mov w2, #3 // =0x3
+; V83A-NEXT:    mov w3, #4 // =0x4
+; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; V83A-NEXT:    mov w4, #5 // =0x5
+; V83A-NEXT:    mov w5, #6 // =0x6
+; V83A-NEXT:    mov w6, #7 // =0x7
+; V83A-NEXT:    mov w7, #8 // =0x8
+; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    adrp x16, .Ltmp1
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp1
+; V83A-NEXT:    hint #39
+; V83A-NEXT:    add sp, sp, #16
+; V83A-NEXT:    autibsp
+; V83A-NEXT:    sub sp, sp, #16
+; V83A-NEXT:    b callee_stack_args
+;
+; PAUTHLR-LABEL: tail_call_fpdiff_b_key:
+; PAUTHLR:       // %bb.0:
+; PAUTHLR-NEXT:    .cfi_b_key_frame
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:  .Ltmp1:
+; PAUTHLR-NEXT:    pacibsppc
+; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
+; PAUTHLR-NEXT:    sub sp, sp, #48
+; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; PAUTHLR-NEXT:    str x22, [sp, #8]
+; PAUTHLR-NEXT:    add x29, sp, #16
+; PAUTHLR-NEXT:    .cfi_def_cfa w29, 32
+; PAUTHLR-NEXT:    .cfi_offset w30, -24
+; PAUTHLR-NEXT:    .cfi_offset w29, -32
+; PAUTHLR-NEXT:    mov w8, #9 // =0x9
+; PAUTHLR-NEXT:    mov w0, #1 // =0x1
+; PAUTHLR-NEXT:    mov w1, #2 // =0x2
+; PAUTHLR-NEXT:    str x8, [x29, #16]
+; PAUTHLR-NEXT:    mov w2, #3 // =0x3
+; PAUTHLR-NEXT:    mov w3, #4 // =0x4
+; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; PAUTHLR-NEXT:    mov w4, #5 // =0x5
+; PAUTHLR-NEXT:    mov w5, #6 // =0x6
+; PAUTHLR-NEXT:    mov w6, #7 // =0x7
+; PAUTHLR-NEXT:    mov w7, #8 // =0x8
+; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
+; PAUTHLR-NEXT:    add sp, sp, #32
+; PAUTHLR-NEXT:    mov x17, x30
+; PAUTHLR-NEXT:    add x16, sp, #16
+; PAUTHLR-NEXT:    adrp x15, .Ltmp1
+; PAUTHLR-NEXT:    add x15, x15, :lo12:.Ltmp1
+; PAUTHLR-NEXT:    autib171615
+; PAUTHLR-NEXT:    mov x30, x17
+; PAUTHLR-NEXT:    b callee_stack_args
   musttail call swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
   ret void
 }
 
-; FPDiff == 0 with PAuthLR: autiasppc is correct.
-define swifttailcc void @test_pauthlr_no_fpdiff(ptr swiftasync %ctx) #0 {
-; CHECK-LABEL: test_pauthlr_no_fpdiff:
-; CHECK:         paciasppc
-; CHECK:         autiasppc
-; CHECK-NOT:     autia171615
-; CHECK:         b callee_no_stack_args
+; FPDiff == 0, A-key: callee has same calling convention, no extra stack args.
+define swifttailcc void @tail_call_no_fpdiff_a_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "frame-pointer"="all" {
+; COMPAT-LABEL: tail_call_no_fpdiff_a_key:
+; COMPAT:       // %bb.0:
+; COMPAT-NEXT:    hint #39
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:  .Ltmp2:
+; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
+; COMPAT-NEXT:    sub sp, sp, #32
+; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; COMPAT-NEXT:    str x22, [sp, #8]
+; COMPAT-NEXT:    add x29, sp, #16
+; COMPAT-NEXT:    .cfi_def_cfa w29, 16
+; COMPAT-NEXT:    .cfi_offset w30, -8
+; COMPAT-NEXT:    .cfi_offset w29, -16
+; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
+; COMPAT-NEXT:    add sp, sp, #32
+; COMPAT-NEXT:    adrp x16, .Ltmp2
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp2
+; COMPAT-NEXT:    hint #39
+; COMPAT-NEXT:    hint #29
+; COMPAT-NEXT:    b callee_no_stack_args
+;
+; V83A-LABEL: tail_call_no_fpdiff_a_key:
+; V83A:       // %bb.0:
+; V83A-NEXT:    hint #39
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:  .Ltmp2:
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    orr x29, x29, #0x1000000000000000
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; V83A-NEXT:    str x22, [sp, #8]
+; V83A-NEXT:    add x29, sp, #16
+; V83A-NEXT:    .cfi_def_cfa w29, 16
+; V83A-NEXT:    .cfi_offset w30, -8
+; V83A-NEXT:    .cfi_offset w29, -16
+; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    adrp x16, .Ltmp2
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp2
+; V83A-NEXT:    hint #39
+; V83A-NEXT:    autiasp
+; V83A-NEXT:    b callee_no_stack_args
+;
+; PAUTHLR-LABEL: tail_call_no_fpdiff_a_key:
+; PAUTHLR:       // %bb.0:
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:  .Ltmp2:
+; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
+; PAUTHLR-NEXT:    sub sp, sp, #32
+; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; PAUTHLR-NEXT:    str x22, [sp, #8]
+; PAUTHLR-NEXT:    add x29, sp, #16
+; PAUTHLR-NEXT:    .cfi_def_cfa w29, 16
+; PAUTHLR-NEXT:    .cfi_offset w30, -8
+; PAUTHLR-NEXT:    .cfi_offset w29, -16
+; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
+; PAUTHLR-NEXT:    add sp, sp, #32
+; PAUTHLR-NEXT:    autiasppc .Ltmp2
+; PAUTHLR-NEXT:    b callee_no_stack_args
   musttail call swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
   ret void
 }
 
-attributes #0 = { nounwind "branch-protection-pauth-lr" "sign-return-address"="all" "frame-pointer"="all" }
+; FPDiff == 0, B-key: callee has same calling convention, no extra stack args.
+define swifttailcc void @tail_call_no_fpdiff_b_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "sign-return-address-key"="b_key" "frame-pointer"="all" {
+; COMPAT-LABEL: tail_call_no_fpdiff_b_key:
+; COMPAT:       // %bb.0:
+; COMPAT-NEXT:    .cfi_b_key_frame
+; COMPAT-NEXT:    hint #39
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:  .Ltmp3:
+; COMPAT-NEXT:    hint #27
+; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
+; COMPAT-NEXT:    sub sp, sp, #32
+; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; COMPAT-NEXT:    str x22, [sp, #8]
+; COMPAT-NEXT:    add x29, sp, #16
+; COMPAT-NEXT:    .cfi_def_cfa w29, 16
+; COMPAT-NEXT:    .cfi_offset w30, -8
+; COMPAT-NEXT:    .cfi_offset w29, -16
+; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
+; COMPAT-NEXT:    add sp, sp, #32
+; COMPAT-NEXT:    adrp x16, .Ltmp3
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp3
+; COMPAT-NEXT:    hint #39
+; COMPAT-NEXT:    hint #31
+; COMPAT-NEXT:    b callee_no_stack_args
+;
+; V83A-LABEL: tail_call_no_fpdiff_b_key:
+; V83A:       // %bb.0:
+; V83A-NEXT:    .cfi_b_key_frame
+; V83A-NEXT:    hint #39
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:  .Ltmp3:
+; V83A-NEXT:    pacibsp
+; V83A-NEXT:    orr x29, x29, #0x1000000000000000
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; V83A-NEXT:    str x22, [sp, #8]
+; V83A-NEXT:    add x29, sp, #16
+; V83A-NEXT:    .cfi_def_cfa w29, 16
+; V83A-NEXT:    .cfi_offset w30, -8
+; V83A-NEXT:    .cfi_offset w29, -16
+; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    adrp x16, .Ltmp3
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp3
+; V83A-NEXT:    hint #39
+; V83A-NEXT:    autibsp
+; V83A-NEXT:    b callee_no_stack_args
+;
+; PAUTHLR-LABEL: tail_call_no_fpdiff_b_key:
+; PAUTHLR:       // %bb.0:
+; PAUTHLR-NEXT:    .cfi_b_key_frame
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:  .Ltmp3:
+; PAUTHLR-NEXT:    pacibsppc
+; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
+; PAUTHLR-NEXT:    sub sp, sp, #32
+; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; PAUTHLR-NEXT:    str x22, [sp, #8]
+; PAUTHLR-NEXT:    add x29, sp, #16
+; PAUTHLR-NEXT:    .cfi_def_cfa w29, 16
+; PAUTHLR-NEXT:    .cfi_offset w30, -8
+; PAUTHLR-NEXT:    .cfi_offset w29, -16
+; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
+; PAUTHLR-NEXT:    add sp, sp, #32
+; PAUTHLR-NEXT:    autibsppc .Ltmp3
+; PAUTHLR-NEXT:    b callee_no_stack_args
+  musttail call swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
+  ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

>From 1d7c7de8ef2d99f59dcc289dba4a7fae3dccd08a Mon Sep 17 00:00:00 2001
From: Oskar Wirga <oskar at wirga.com>
Date: Thu, 2 Apr 2026 18:24:18 -0400
Subject: [PATCH 05/10] Use PACM + AUTIA1716 for NOP-space PAuthLR when FPDiff
 != 0

Per reviewer feedback: PACM makes AUTIA1716 behave as AUTIA171615
on PAuthLR hardware. On older hardware, PACM is a NOP and x15 is
ignored. This avoids temporarily adjusting SP.

x17=LR, x16=entry_SP, x15=PACSym for all PAuthLR FPDiff != 0 cases but we don't need PACSym when !MFnI->branchProtectionPAuthLR()
---
 .../lib/Target/AArch64/AArch64PointerAuth.cpp | 70 +++++++++++++------
 .../CodeGen/AArch64/arm64e-tail-call-autib.ll |  4 +-
 .../AArch64/pauth-lr-tail-call-fpdiff.ll      | 44 ++++++------
 3 files changed, 76 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index 16e56962f0bdf..f0ad4daca5da3 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -229,38 +229,66 @@ void AArch64PointerAuth::authenticateLR(
             .setMIFlag(MachineInstr::FrameDestroy);
       }
     } else if (MFnI->branchProtectionPAuthLR()) {
-      // NOP-space PAuthLR compat: PACM + AUTIASP. AUTIASP reads SP
-      // implicitly, so when FPDiff != 0 we restore SP to entry value
-      // around the auth instruction.
+      // NOP-space PAuthLR compat. PACM makes AUTIA1716 behave as
+      // AUTIA171615 on PAuthLR hardware; on older hardware PACM is a
+      // NOP and x15 is ignored.
       assert(PACSym && "No PAC instruction to refer to");
-      emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
-          .setMIFlag(MachineInstr::FrameDestroy);
       emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
       if (FPDiff != 0) {
-        emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+        // x17=LR, x16=entry_SP, x15=PACSym. PACM + AUTIA1716.
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
+            .addReg(AArch64::XZR)
+            .addReg(AArch64::LR)
+            .addImm(0)
+            .setMIFlag(MachineInstr::FrameDestroy);
+        emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
                         StackOffset::getFixed(-FPDiff), TII,
                         MachineInstr::FrameDestroy);
-      }
-      BuildMI(MBB, MBBI, DL,
-              TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
-          .setMIFlag(MachineInstr::FrameDestroy);
-      if (FPDiff != 0) {
-        emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
-                        StackOffset::getFixed(FPDiff), TII,
-                        MachineInstr::FrameDestroy);
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADRP), AArch64::X15)
+            .addSym(PACSym, AArch64II::MO_PAGE)
+            .setMIFlag(MachineInstr::FrameDestroy);
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
+            .addReg(AArch64::X15)
+            .addSym(PACSym, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
+            .addImm(0)
+            .setMIFlag(MachineInstr::FrameDestroy);
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
+            .setMIFlag(MachineInstr::FrameDestroy);
+        unsigned AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
+        BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
+            .setMIFlag(MachineInstr::FrameDestroy);
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::LR)
+            .addReg(AArch64::XZR)
+            .addReg(AArch64::X17)
+            .addImm(0)
+            .setMIFlag(MachineInstr::FrameDestroy);
+      } else {
+        emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
+            .setMIFlag(MachineInstr::FrameDestroy);
+        BuildMI(MBB, MBBI, DL,
+                TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
+            .setMIFlag(MachineInstr::FrameDestroy);
       }
     } else {
-      // Plain pac-ret: compute entry SP into x16 when FPDiff != 0.
+      // Plain pac-ret: use AUTIA1716 when FPDiff != 0.
       if (FPDiff != 0) {
-        // entry_SP = current_SP - FPDiff.
+        // x17=LR, x16=entry_SP. No PACSym needed.
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
+            .addReg(AArch64::XZR)
+            .addReg(AArch64::LR)
+            .addImm(0)
+            .setMIFlag(MachineInstr::FrameDestroy);
         emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
                         StackOffset::getFixed(-FPDiff), TII,
                         MachineInstr::FrameDestroy);
-        unsigned AutOpc = UseBKey ? AArch64::AUTIB : AArch64::AUTIA;
-        BuildMI(MBB, MBBI, DL, TII->get(AutOpc), AArch64::LR)
-            .addUse(AArch64::LR)
-            .addUse(AArch64::X16)
+        unsigned AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
+        BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
+            .setMIFlag(MachineInstr::FrameDestroy);
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::LR)
+            .addReg(AArch64::XZR)
+            .addReg(AArch64::X17)
+            .addImm(0)
             .setMIFlag(MachineInstr::FrameDestroy);
       } else {
         BuildMI(MBB, MBBI, DL,
diff --git a/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll b/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll
index b880717482a42..2d5ddbcfc83a6 100644
--- a/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll
+++ b/llvm/test/CodeGen/AArch64/arm64e-tail-call-autib.ll
@@ -40,8 +40,10 @@ define swifttailcc void @test_async_tail_call(ptr swiftasync %ctx) "ptrauth-retu
 ; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
 ; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    mov x17, x30
 ; CHECK-NEXT:    add x16, sp, #16
-; CHECK-NEXT:    autib x30, x16
+; CHECK-NEXT:    autib1716
+; CHECK-NEXT:    mov x30, x17
 ; CHECK-NEXT:    eor x16, x30, x30, lsl #1
 ; CHECK-NEXT:    tbz x16, #62, Lauth_success_0
 ; CHECK-NEXT:    brk #0xc471
diff --git a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
index 1b0904b9cda14..41d578e4b9a2e 100644
--- a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
+++ b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
@@ -39,12 +39,13 @@ define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-pro
 ; COMPAT-NEXT:    mov w7, #8 // =0x8
 ; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
 ; COMPAT-NEXT:    add sp, sp, #32
-; COMPAT-NEXT:    adrp x16, .Ltmp0
-; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp0
+; COMPAT-NEXT:    mov x17, x30
+; COMPAT-NEXT:    add x16, sp, #16
+; COMPAT-NEXT:    adrp x15, .Ltmp0
+; COMPAT-NEXT:    add x15, x15, :lo12:.Ltmp0
 ; COMPAT-NEXT:    hint #39
-; COMPAT-NEXT:    add sp, sp, #16
-; COMPAT-NEXT:    hint #29
-; COMPAT-NEXT:    sub sp, sp, #16
+; COMPAT-NEXT:    hint #12
+; COMPAT-NEXT:    mov x30, x17
 ; COMPAT-NEXT:    b callee_stack_args
 ;
 ; V83A-LABEL: tail_call_fpdiff_a_key:
@@ -74,12 +75,13 @@ define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-pro
 ; V83A-NEXT:    mov w7, #8 // =0x8
 ; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
 ; V83A-NEXT:    add sp, sp, #32
-; V83A-NEXT:    adrp x16, .Ltmp0
-; V83A-NEXT:    add x16, x16, :lo12:.Ltmp0
+; V83A-NEXT:    mov x17, x30
+; V83A-NEXT:    add x16, sp, #16
+; V83A-NEXT:    adrp x15, .Ltmp0
+; V83A-NEXT:    add x15, x15, :lo12:.Ltmp0
 ; V83A-NEXT:    hint #39
-; V83A-NEXT:    add sp, sp, #16
-; V83A-NEXT:    autiasp
-; V83A-NEXT:    sub sp, sp, #16
+; V83A-NEXT:    autia1716
+; V83A-NEXT:    mov x30, x17
 ; V83A-NEXT:    b callee_stack_args
 ;
 ; PAUTHLR-LABEL: tail_call_fpdiff_a_key:
@@ -149,12 +151,13 @@ define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-pro
 ; COMPAT-NEXT:    mov w7, #8 // =0x8
 ; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
 ; COMPAT-NEXT:    add sp, sp, #32
-; COMPAT-NEXT:    adrp x16, .Ltmp1
-; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp1
+; COMPAT-NEXT:    mov x17, x30
+; COMPAT-NEXT:    add x16, sp, #16
+; COMPAT-NEXT:    adrp x15, .Ltmp1
+; COMPAT-NEXT:    add x15, x15, :lo12:.Ltmp1
 ; COMPAT-NEXT:    hint #39
-; COMPAT-NEXT:    add sp, sp, #16
-; COMPAT-NEXT:    hint #31
-; COMPAT-NEXT:    sub sp, sp, #16
+; COMPAT-NEXT:    hint #14
+; COMPAT-NEXT:    mov x30, x17
 ; COMPAT-NEXT:    b callee_stack_args
 ;
 ; V83A-LABEL: tail_call_fpdiff_b_key:
@@ -185,12 +188,13 @@ define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-pro
 ; V83A-NEXT:    mov w7, #8 // =0x8
 ; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
 ; V83A-NEXT:    add sp, sp, #32
-; V83A-NEXT:    adrp x16, .Ltmp1
-; V83A-NEXT:    add x16, x16, :lo12:.Ltmp1
+; V83A-NEXT:    mov x17, x30
+; V83A-NEXT:    add x16, sp, #16
+; V83A-NEXT:    adrp x15, .Ltmp1
+; V83A-NEXT:    add x15, x15, :lo12:.Ltmp1
 ; V83A-NEXT:    hint #39
-; V83A-NEXT:    add sp, sp, #16
-; V83A-NEXT:    autibsp
-; V83A-NEXT:    sub sp, sp, #16
+; V83A-NEXT:    autib1716
+; V83A-NEXT:    mov x30, x17
 ; V83A-NEXT:    b callee_stack_args
 ;
 ; PAUTHLR-LABEL: tail_call_fpdiff_b_key:

>From 52aa08150f31f37f09c26dfd9d00a06251a3fcd8 Mon Sep 17 00:00:00 2001
From: Oskar Wirga <oskar at wirga.com>
Date: Thu, 2 Apr 2026 18:56:00 -0400
Subject: [PATCH 06/10] Restructure authenticateLR for all PAuthLR x FPDiff
 cases

Put the FPDiff check on the outside and started with the ! MFnI->branchProtectionPAuthLR() case so that I could just branch on Subtarget->hasPAuthLR() to set the new AutOpc
---
 .../lib/Target/AArch64/AArch64PointerAuth.cpp | 136 +++++++-----------
 1 file changed, 53 insertions(+), 83 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index f0ad4daca5da3..393ce0177e2db 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -191,21 +191,33 @@ void AArch64PointerAuth::authenticateLR(
     auto &AFL = *static_cast<const AArch64FrameLowering *>(
         MF.getSubtarget().getFrameLowering());
     int64_t FPDiff = AFL.getArgumentStackToRestore(MF, MBB);
+    unsigned AutOpc = UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP;
+
+    if (FPDiff != 0) {
+      // Use AUTI[AB]1716 variants: x17=LR, x16=entry_SP.
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
+          .addReg(AArch64::XZR)
+          .addReg(AArch64::LR)
+          .addImm(0)
+          .setMIFlag(MachineInstr::FrameDestroy);
+      emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
+                      StackOffset::getFixed(-FPDiff), TII,
+                      MachineInstr::FrameDestroy);
 
-    if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
-      // FEAT_PAuth_LR: use AUTIASPPCi or AUTIA171615 for FPDiff != 0.
-      assert(PACSym && "No PAC instruction to refer to");
-      emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
-      if (FPDiff != 0) {
-        // x17=LR, x16=entry_SP, x15=PACSym. Result in x17 → LR.
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
-            .addReg(AArch64::XZR)
-            .addReg(AArch64::LR)
-            .addImm(0)
+      if (!MFnI->branchProtectionPAuthLR()) {
+        // Plain pac-ret: AUTIA1716, no PACSym needed.
+        AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
+        BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
             .setMIFlag(MachineInstr::FrameDestroy);
-        emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
-                        StackOffset::getFixed(-FPDiff), TII,
-                        MachineInstr::FrameDestroy);
+        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
+      } else {
+        // PAuthLR: materialize PACSym into x15, then authenticate.
+        // With hasPAuthLR: use AUTIA171615 directly.
+        // Without: PACM + AUTIA1716 (PACM makes 1716 behave as 171615
+        // on PAuthLR hardware; on older hardware PACM is a NOP and x15
+        // is ignored).
+        assert(PACSym && "No PAC instruction to refer to");
+        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADRP), AArch64::X15)
             .addSym(PACSym, AArch64II::MO_PAGE)
             .setMIFlag(MachineInstr::FrameDestroy);
@@ -214,88 +226,46 @@ void AArch64PointerAuth::authenticateLR(
             .addSym(PACSym, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
             .addImm(0)
             .setMIFlag(MachineInstr::FrameDestroy);
-        unsigned AutOpc = UseBKey ? AArch64::AUTIB171615 : AArch64::AUTIA171615;
-        BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
-            .setMIFlag(MachineInstr::FrameDestroy);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::LR)
-            .addReg(AArch64::XZR)
-            .addReg(AArch64::X17)
-            .addImm(0)
-            .setMIFlag(MachineInstr::FrameDestroy);
-      } else {
+
+        AutOpc = UseBKey ? AArch64::AUTIB171615 : AArch64::AUTIA171615;
+        if (Subtarget->hasPAuthLR()) {
+          BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
+              .setMIFlag(MachineInstr::FrameDestroy);
+        } else {
+          BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
+              .setMIFlag(MachineInstr::FrameDestroy);
+          AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
+          BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
+              .setMIFlag(MachineInstr::FrameDestroy);
+        }
+      }
+
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::LR)
+          .addReg(AArch64::XZR)
+          .addReg(AArch64::X17)
+          .addImm(0)
+          .setMIFlag(MachineInstr::FrameDestroy);
+    } else {
+      if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
+        assert(PACSym && "No PAC instruction to refer to");
+        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
         BuildMI(MBB, MBBI, DL,
                 TII->get(UseBKey ? AArch64::AUTIBSPPCi : AArch64::AUTIASPPCi))
             .addSym(PACSym)
             .setMIFlag(MachineInstr::FrameDestroy);
-      }
-    } else if (MFnI->branchProtectionPAuthLR()) {
-      // NOP-space PAuthLR compat. PACM makes AUTIA1716 behave as
-      // AUTIA171615 on PAuthLR hardware; on older hardware PACM is a
-      // NOP and x15 is ignored.
-      assert(PACSym && "No PAC instruction to refer to");
-      emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
-      if (FPDiff != 0) {
-        // x17=LR, x16=entry_SP, x15=PACSym. PACM + AUTIA1716.
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
-            .addReg(AArch64::XZR)
-            .addReg(AArch64::LR)
-            .addImm(0)
-            .setMIFlag(MachineInstr::FrameDestroy);
-        emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
-                        StackOffset::getFixed(-FPDiff), TII,
-                        MachineInstr::FrameDestroy);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADRP), AArch64::X15)
-            .addSym(PACSym, AArch64II::MO_PAGE)
-            .setMIFlag(MachineInstr::FrameDestroy);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
-            .addReg(AArch64::X15)
-            .addSym(PACSym, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
-            .addImm(0)
-            .setMIFlag(MachineInstr::FrameDestroy);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
-            .setMIFlag(MachineInstr::FrameDestroy);
-        unsigned AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
-        BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
-            .setMIFlag(MachineInstr::FrameDestroy);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::LR)
-            .addReg(AArch64::XZR)
-            .addReg(AArch64::X17)
-            .addImm(0)
-            .setMIFlag(MachineInstr::FrameDestroy);
-      } else {
+      } else if (MFnI->branchProtectionPAuthLR()) {
+        assert(PACSym && "No PAC instruction to refer to");
         emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
             .setMIFlag(MachineInstr::FrameDestroy);
-        BuildMI(MBB, MBBI, DL,
-                TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
-            .setMIFlag(MachineInstr::FrameDestroy);
-      }
-    } else {
-      // Plain pac-ret: use AUTIA1716 when FPDiff != 0.
-      if (FPDiff != 0) {
-        // x17=LR, x16=entry_SP. No PACSym needed.
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
-            .addReg(AArch64::XZR)
-            .addReg(AArch64::LR)
-            .addImm(0)
-            .setMIFlag(MachineInstr::FrameDestroy);
-        emitFrameOffset(MBB, MBBI, DL, AArch64::X16, AArch64::SP,
-                        StackOffset::getFixed(-FPDiff), TII,
-                        MachineInstr::FrameDestroy);
-        unsigned AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
+        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
         BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
             .setMIFlag(MachineInstr::FrameDestroy);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::LR)
-            .addReg(AArch64::XZR)
-            .addReg(AArch64::X17)
-            .addImm(0)
-            .setMIFlag(MachineInstr::FrameDestroy);
       } else {
-        BuildMI(MBB, MBBI, DL,
-                TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
+        BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
             .setMIFlag(MachineInstr::FrameDestroy);
+        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
       }
-      emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
     }
 
     if (NeedsWinCFI) {

>From ba1ce5058d5d058fe56c2ad74dd16ce36eb627c3 Mon Sep 17 00:00:00 2001
From: Oskar Wirga <oskar at wirga.com>
Date: Thu, 2 Apr 2026 19:58:17 -0400
Subject: [PATCH 07/10] Sink AutOpc's

Sink them all the way down to callsites
---
 llvm/lib/Target/AArch64/AArch64PointerAuth.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index 393ce0177e2db..f93bf71fc7381 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -191,7 +191,6 @@ void AArch64PointerAuth::authenticateLR(
     auto &AFL = *static_cast<const AArch64FrameLowering *>(
         MF.getSubtarget().getFrameLowering());
     int64_t FPDiff = AFL.getArgumentStackToRestore(MF, MBB);
-    unsigned AutOpc = UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP;
 
     if (FPDiff != 0) {
       // Use AUTI[AB]1716 variants: x17=LR, x16=entry_SP.
@@ -206,7 +205,7 @@ void AArch64PointerAuth::authenticateLR(
 
       if (!MFnI->branchProtectionPAuthLR()) {
         // Plain pac-ret: AUTIA1716, no PACSym needed.
-        AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
+        unsigned AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
         BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
             .setMIFlag(MachineInstr::FrameDestroy);
         emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
@@ -227,14 +226,15 @@ void AArch64PointerAuth::authenticateLR(
             .addImm(0)
             .setMIFlag(MachineInstr::FrameDestroy);
 
-        AutOpc = UseBKey ? AArch64::AUTIB171615 : AArch64::AUTIA171615;
         if (Subtarget->hasPAuthLR()) {
+          unsigned AutOpc =
+              UseBKey ? AArch64::AUTIB171615 : AArch64::AUTIA171615;
           BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
               .setMIFlag(MachineInstr::FrameDestroy);
         } else {
           BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
               .setMIFlag(MachineInstr::FrameDestroy);
-          AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
+          unsigned AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
           BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
               .setMIFlag(MachineInstr::FrameDestroy);
         }
@@ -259,9 +259,11 @@ void AArch64PointerAuth::authenticateLR(
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
             .setMIFlag(MachineInstr::FrameDestroy);
         emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
+        unsigned AutOpc = UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP;
         BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
             .setMIFlag(MachineInstr::FrameDestroy);
       } else {
+        unsigned AutOpc = UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP;
         BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
             .setMIFlag(MachineInstr::FrameDestroy);
         emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);

>From f3c5ba625da6af216d06c12905520746b2e262cd Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Fri, 3 Apr 2026 11:33:31 -0700
Subject: [PATCH 08/10] fix missing pac cfi bug, and rearrange to help
 auditability of the three cases. turn on async unwind tables in the test to
 catch this.

---
 .../lib/Target/AArch64/AArch64PointerAuth.cpp |  82 +++----
 .../AArch64/pauth-lr-tail-call-fpdiff.ll      | 219 +++++++++++++++++-
 2 files changed, 246 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index f93bf71fc7381..4571f578fc90b 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -58,16 +58,19 @@ FunctionPass *llvm::createAArch64PointerAuthPass() {
 
 char AArch64PointerAuth::ID = 0;
 
-static void emitPACSymOffsetIntoX16(const TargetInstrInfo &TII,
+static void emitPACSymOffsetIntoReg(const TargetInstrInfo &TII,
                                     MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator I, DebugLoc DL,
-                                    MCSymbol *PACSym) {
-  BuildMI(MBB, I, DL, TII.get(AArch64::ADRP), AArch64::X16)
-      .addSym(PACSym, AArch64II::MO_PAGE);
-  BuildMI(MBB, I, DL, TII.get(AArch64::ADDXri), AArch64::X16)
-      .addReg(AArch64::X16)
+                                    MCSymbol *PACSym,
+                                    Register Reg) {
+  BuildMI(MBB, I, DL, TII.get(AArch64::ADRP), Reg)
+      .addSym(PACSym, AArch64II::MO_PAGE)
+      .setMIFlag(MachineInstr::FrameDestroy);
+  BuildMI(MBB, I, DL, TII.get(AArch64::ADDXri), Reg)
+      .addReg(Reg)
       .addSym(PACSym, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
-      .addImm(0);
+      .addImm(0)
+      .setMIFlag(MachineInstr::FrameDestroy);
 }
 
 static void emitPACCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
@@ -173,7 +176,7 @@ void AArch64PointerAuth::authenticateLR(
           .setMIFlag(MachineInstr::FrameDestroy);
     } else {
       if (MFnI->branchProtectionPAuthLR()) {
-        emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
+        emitPACSymOffsetIntoReg(*TII, MBB, MBBI, DL, PACSym, AArch64::X16);
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
             .setMIFlag(MachineInstr::FrameDestroy);
       }
@@ -183,16 +186,12 @@ void AArch64PointerAuth::authenticateLR(
     }
     MBB.erase(TI);
   } else {
-    // When FPDiff != 0 (tail call with different stack arg space), SP has
+    // When FPDiff != 0 (tail call with callee-popped stack arg space), SP has
     // been adjusted and no longer matches the entry SP used as the signing
     // modifier. We must reconstruct entry SP for authentication.
-    // The three signing modes (hasPAuthLR, NOP-space PAuthLR, plain pac-ret)
-    // each need different handling — see inline comments below.
     auto &AFL = *static_cast<const AArch64FrameLowering *>(
         MF.getSubtarget().getFrameLowering());
-    int64_t FPDiff = AFL.getArgumentStackToRestore(MF, MBB);
-
-    if (FPDiff != 0) {
+    if (int64_t FPDiff = AFL.getArgumentStackToRestore(MF, MBB)) {
       // Use AUTI[AB]1716 variants: x17=LR, x16=entry_SP.
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
           .addReg(AArch64::XZR)
@@ -203,41 +202,34 @@ void AArch64PointerAuth::authenticateLR(
                       StackOffset::getFixed(-FPDiff), TII,
                       MachineInstr::FrameDestroy);
 
-      if (!MFnI->branchProtectionPAuthLR()) {
-        // Plain pac-ret: AUTIA1716, no PACSym needed.
+      if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
+        assert(PACSym && "No PAC instruction to refer to");
+        emitPACSymOffsetIntoReg(*TII, MBB, MBBI, DL, PACSym, AArch64::X15);
+
+        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
+        unsigned AutOpc = UseBKey ? AArch64::AUTIB171615 : AArch64::AUTIA171615;
+        BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
+            .setMIFlag(MachineInstr::FrameDestroy);
+      } else if (MFnI->branchProtectionPAuthLR()) {
+        assert(PACSym && "No PAC instruction to refer to");
+        emitPACSymOffsetIntoReg(*TII, MBB, MBBI, DL, PACSym, AArch64::X15);
+
+        // The PACM hint-space instruction modifies the following AUTI[AB]1716
+        // to optionally take x15 as an extra operand depending on the
+        // presence of +pauth-lr at runtime. On machines without +pauth-lr, it
+        // behaves as a nop, and the address of the PACI[AB]SP in x15 is ignored.
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
+            .setMIFlag(MachineInstr::FrameDestroy);
+
         unsigned AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
         BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
             .setMIFlag(MachineInstr::FrameDestroy);
         emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
       } else {
-        // PAuthLR: materialize PACSym into x15, then authenticate.
-        // With hasPAuthLR: use AUTIA171615 directly.
-        // Without: PACM + AUTIA1716 (PACM makes 1716 behave as 171615
-        // on PAuthLR hardware; on older hardware PACM is a NOP and x15
-        // is ignored).
-        assert(PACSym && "No PAC instruction to refer to");
-        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADRP), AArch64::X15)
-            .addSym(PACSym, AArch64II::MO_PAGE)
-            .setMIFlag(MachineInstr::FrameDestroy);
-        BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
-            .addReg(AArch64::X15)
-            .addSym(PACSym, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
-            .addImm(0)
+        unsigned AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
+        BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
             .setMIFlag(MachineInstr::FrameDestroy);
-
-        if (Subtarget->hasPAuthLR()) {
-          unsigned AutOpc =
-              UseBKey ? AArch64::AUTIB171615 : AArch64::AUTIA171615;
-          BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
-              .setMIFlag(MachineInstr::FrameDestroy);
-        } else {
-          BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
-              .setMIFlag(MachineInstr::FrameDestroy);
-          unsigned AutOpc = UseBKey ? AArch64::AUTIB1716 : AArch64::AUTIA1716;
-          BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
-              .setMIFlag(MachineInstr::FrameDestroy);
-        }
+        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
       }
 
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::LR)
@@ -255,13 +247,13 @@ void AArch64PointerAuth::authenticateLR(
             .setMIFlag(MachineInstr::FrameDestroy);
       } else if (MFnI->branchProtectionPAuthLR()) {
         assert(PACSym && "No PAC instruction to refer to");
-        emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
+        emitPACSymOffsetIntoReg(*TII, MBB, MBBI, DL, PACSym, AArch64::X16);
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
             .setMIFlag(MachineInstr::FrameDestroy);
-        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
         unsigned AutOpc = UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP;
         BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
             .setMIFlag(MachineInstr::FrameDestroy);
+        emitPACCFI(MBB, MBBI, MachineInstr::FrameDestroy, EmitAsyncCFI);
       } else {
         unsigned AutOpc = UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP;
         BuildMI(MBB, MBBI, DL, TII->get(AutOpc))
diff --git a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
index 41d578e4b9a2e..bfb865ac71b2d 100644
--- a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
+++ b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
@@ -2,6 +2,9 @@
 ; RUN: llc -mtriple=aarch64              < %s | FileCheck --check-prefixes=CHECK,COMPAT %s
 ; RUN: llc -mtriple=aarch64 -mattr=v8.3a < %s | FileCheck --check-prefixes=CHECK,V83A %s
 ; RUN: llc -mtriple=aarch64 -mattr=v9a -mattr=pauth-lr < %s | FileCheck --check-prefixes=PAUTHLR %s
+; RUN: sed 's/"branch-protection-pauth-lr" //g' %s \
+; RUN:   | llc -mtriple=aarch64 -mattr=v9a \
+; RUN:   | FileCheck --check-prefixes=NOPAUTHLR %s
 ;
 ; Test tail calls with return address signing across all three modes
 ; (COMPAT, V83A, PAUTHLR) and both FPDiff==0 and FPDiff!=0 cases,
@@ -11,7 +14,7 @@ declare swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64, i64, i64,
 declare swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
 
 ; FPDiff != 0, A-key: callee has stack args that this function doesn't.
-define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "frame-pointer"="all" {
+define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "frame-pointer"="all" uwtable(async) {
 ; COMPAT-LABEL: tail_call_fpdiff_a_key:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    hint #39
@@ -20,6 +23,7 @@ define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-pro
 ; COMPAT-NEXT:    hint #25
 ; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
 ; COMPAT-NEXT:    sub sp, sp, #48
+; COMPAT-NEXT:    .cfi_def_cfa_offset 48
 ; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; COMPAT-NEXT:    str x22, [sp, #8]
 ; COMPAT-NEXT:    add x29, sp, #16
@@ -32,19 +36,24 @@ define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-pro
 ; COMPAT-NEXT:    str x8, [x29, #16]
 ; COMPAT-NEXT:    mov w2, #3 // =0x3
 ; COMPAT-NEXT:    mov w3, #4 // =0x4
-; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; COMPAT-NEXT:    mov w4, #5 // =0x5
 ; COMPAT-NEXT:    mov w5, #6 // =0x6
 ; COMPAT-NEXT:    mov w6, #7 // =0x7
 ; COMPAT-NEXT:    mov w7, #8 // =0x8
+; COMPAT-NEXT:    .cfi_def_cfa wsp, 48
+; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
 ; COMPAT-NEXT:    add sp, sp, #32
+; COMPAT-NEXT:    .cfi_def_cfa_offset 16
+; COMPAT-NEXT:    .cfi_restore w30
+; COMPAT-NEXT:    .cfi_restore w29
 ; COMPAT-NEXT:    mov x17, x30
 ; COMPAT-NEXT:    add x16, sp, #16
 ; COMPAT-NEXT:    adrp x15, .Ltmp0
 ; COMPAT-NEXT:    add x15, x15, :lo12:.Ltmp0
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #12
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    mov x30, x17
 ; COMPAT-NEXT:    b callee_stack_args
 ;
@@ -56,6 +65,7 @@ define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-pro
 ; V83A-NEXT:    paciasp
 ; V83A-NEXT:    orr x29, x29, #0x1000000000000000
 ; V83A-NEXT:    sub sp, sp, #48
+; V83A-NEXT:    .cfi_def_cfa_offset 48
 ; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; V83A-NEXT:    str x22, [sp, #8]
 ; V83A-NEXT:    add x29, sp, #16
@@ -68,19 +78,24 @@ define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-pro
 ; V83A-NEXT:    str x8, [x29, #16]
 ; V83A-NEXT:    mov w2, #3 // =0x3
 ; V83A-NEXT:    mov w3, #4 // =0x4
-; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; V83A-NEXT:    mov w4, #5 // =0x5
 ; V83A-NEXT:    mov w5, #6 // =0x6
 ; V83A-NEXT:    mov w6, #7 // =0x7
 ; V83A-NEXT:    mov w7, #8 // =0x8
+; V83A-NEXT:    .cfi_def_cfa wsp, 48
+; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
 ; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 16
+; V83A-NEXT:    .cfi_restore w30
+; V83A-NEXT:    .cfi_restore w29
 ; V83A-NEXT:    mov x17, x30
 ; V83A-NEXT:    add x16, sp, #16
 ; V83A-NEXT:    adrp x15, .Ltmp0
 ; V83A-NEXT:    add x15, x15, :lo12:.Ltmp0
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    autia1716
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    mov x30, x17
 ; V83A-NEXT:    b callee_stack_args
 ;
@@ -91,6 +106,7 @@ define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-pro
 ; PAUTHLR-NEXT:    paciasppc
 ; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
 ; PAUTHLR-NEXT:    sub sp, sp, #48
+; PAUTHLR-NEXT:    .cfi_def_cfa_offset 48
 ; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; PAUTHLR-NEXT:    str x22, [sp, #8]
 ; PAUTHLR-NEXT:    add x29, sp, #16
@@ -103,26 +119,68 @@ define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-pro
 ; PAUTHLR-NEXT:    str x8, [x29, #16]
 ; PAUTHLR-NEXT:    mov w2, #3 // =0x3
 ; PAUTHLR-NEXT:    mov w3, #4 // =0x4
-; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; PAUTHLR-NEXT:    mov w4, #5 // =0x5
 ; PAUTHLR-NEXT:    mov w5, #6 // =0x6
 ; PAUTHLR-NEXT:    mov w6, #7 // =0x7
 ; PAUTHLR-NEXT:    mov w7, #8 // =0x8
+; PAUTHLR-NEXT:    .cfi_def_cfa wsp, 48
+; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
 ; PAUTHLR-NEXT:    add sp, sp, #32
+; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
+; PAUTHLR-NEXT:    .cfi_restore w30
+; PAUTHLR-NEXT:    .cfi_restore w29
 ; PAUTHLR-NEXT:    mov x17, x30
 ; PAUTHLR-NEXT:    add x16, sp, #16
 ; PAUTHLR-NEXT:    adrp x15, .Ltmp0
 ; PAUTHLR-NEXT:    add x15, x15, :lo12:.Ltmp0
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    autia171615
 ; PAUTHLR-NEXT:    mov x30, x17
 ; PAUTHLR-NEXT:    b callee_stack_args
+;
+; NOPAUTHLR-LABEL: tail_call_fpdiff_a_key:
+; NOPAUTHLR:       // %bb.0:
+; NOPAUTHLR-NEXT:    paciasp
+; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
+; NOPAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
+; NOPAUTHLR-NEXT:    sub sp, sp, #48
+; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 48
+; NOPAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; NOPAUTHLR-NEXT:    str x22, [sp, #8]
+; NOPAUTHLR-NEXT:    add x29, sp, #16
+; NOPAUTHLR-NEXT:    .cfi_def_cfa w29, 32
+; NOPAUTHLR-NEXT:    .cfi_offset w30, -24
+; NOPAUTHLR-NEXT:    .cfi_offset w29, -32
+; NOPAUTHLR-NEXT:    mov w8, #9 // =0x9
+; NOPAUTHLR-NEXT:    mov w0, #1 // =0x1
+; NOPAUTHLR-NEXT:    mov w1, #2 // =0x2
+; NOPAUTHLR-NEXT:    str x8, [x29, #16]
+; NOPAUTHLR-NEXT:    mov w2, #3 // =0x3
+; NOPAUTHLR-NEXT:    mov w3, #4 // =0x4
+; NOPAUTHLR-NEXT:    mov w4, #5 // =0x5
+; NOPAUTHLR-NEXT:    mov w5, #6 // =0x6
+; NOPAUTHLR-NEXT:    mov w6, #7 // =0x7
+; NOPAUTHLR-NEXT:    mov w7, #8 // =0x8
+; NOPAUTHLR-NEXT:    .cfi_def_cfa wsp, 48
+; NOPAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; NOPAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
+; NOPAUTHLR-NEXT:    add sp, sp, #32
+; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 16
+; NOPAUTHLR-NEXT:    .cfi_restore w30
+; NOPAUTHLR-NEXT:    .cfi_restore w29
+; NOPAUTHLR-NEXT:    mov x17, x30
+; NOPAUTHLR-NEXT:    add x16, sp, #16
+; NOPAUTHLR-NEXT:    autia1716
+; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
+; NOPAUTHLR-NEXT:    mov x30, x17
+; NOPAUTHLR-NEXT:    b callee_stack_args
   musttail call swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
   ret void
 }
 
 ; FPDiff != 0, B-key: callee has stack args that this function doesn't.
-define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "sign-return-address-key"="b_key" "frame-pointer"="all" {
+define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "sign-return-address-key"="b_key" "frame-pointer"="all" uwtable(async) {
 ; COMPAT-LABEL: tail_call_fpdiff_b_key:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    .cfi_b_key_frame
@@ -132,6 +190,7 @@ define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-pro
 ; COMPAT-NEXT:    hint #27
 ; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
 ; COMPAT-NEXT:    sub sp, sp, #48
+; COMPAT-NEXT:    .cfi_def_cfa_offset 48
 ; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; COMPAT-NEXT:    str x22, [sp, #8]
 ; COMPAT-NEXT:    add x29, sp, #16
@@ -144,19 +203,24 @@ define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-pro
 ; COMPAT-NEXT:    str x8, [x29, #16]
 ; COMPAT-NEXT:    mov w2, #3 // =0x3
 ; COMPAT-NEXT:    mov w3, #4 // =0x4
-; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; COMPAT-NEXT:    mov w4, #5 // =0x5
 ; COMPAT-NEXT:    mov w5, #6 // =0x6
 ; COMPAT-NEXT:    mov w6, #7 // =0x7
 ; COMPAT-NEXT:    mov w7, #8 // =0x8
+; COMPAT-NEXT:    .cfi_def_cfa wsp, 48
+; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
 ; COMPAT-NEXT:    add sp, sp, #32
+; COMPAT-NEXT:    .cfi_def_cfa_offset 16
+; COMPAT-NEXT:    .cfi_restore w30
+; COMPAT-NEXT:    .cfi_restore w29
 ; COMPAT-NEXT:    mov x17, x30
 ; COMPAT-NEXT:    add x16, sp, #16
 ; COMPAT-NEXT:    adrp x15, .Ltmp1
 ; COMPAT-NEXT:    add x15, x15, :lo12:.Ltmp1
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #14
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    mov x30, x17
 ; COMPAT-NEXT:    b callee_stack_args
 ;
@@ -169,6 +233,7 @@ define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-pro
 ; V83A-NEXT:    pacibsp
 ; V83A-NEXT:    orr x29, x29, #0x1000000000000000
 ; V83A-NEXT:    sub sp, sp, #48
+; V83A-NEXT:    .cfi_def_cfa_offset 48
 ; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; V83A-NEXT:    str x22, [sp, #8]
 ; V83A-NEXT:    add x29, sp, #16
@@ -181,19 +246,24 @@ define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-pro
 ; V83A-NEXT:    str x8, [x29, #16]
 ; V83A-NEXT:    mov w2, #3 // =0x3
 ; V83A-NEXT:    mov w3, #4 // =0x4
-; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; V83A-NEXT:    mov w4, #5 // =0x5
 ; V83A-NEXT:    mov w5, #6 // =0x6
 ; V83A-NEXT:    mov w6, #7 // =0x7
 ; V83A-NEXT:    mov w7, #8 // =0x8
+; V83A-NEXT:    .cfi_def_cfa wsp, 48
+; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
 ; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 16
+; V83A-NEXT:    .cfi_restore w30
+; V83A-NEXT:    .cfi_restore w29
 ; V83A-NEXT:    mov x17, x30
 ; V83A-NEXT:    add x16, sp, #16
 ; V83A-NEXT:    adrp x15, .Ltmp1
 ; V83A-NEXT:    add x15, x15, :lo12:.Ltmp1
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    autib1716
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    mov x30, x17
 ; V83A-NEXT:    b callee_stack_args
 ;
@@ -205,6 +275,7 @@ define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-pro
 ; PAUTHLR-NEXT:    pacibsppc
 ; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
 ; PAUTHLR-NEXT:    sub sp, sp, #48
+; PAUTHLR-NEXT:    .cfi_def_cfa_offset 48
 ; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; PAUTHLR-NEXT:    str x22, [sp, #8]
 ; PAUTHLR-NEXT:    add x29, sp, #16
@@ -217,26 +288,69 @@ define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-pro
 ; PAUTHLR-NEXT:    str x8, [x29, #16]
 ; PAUTHLR-NEXT:    mov w2, #3 // =0x3
 ; PAUTHLR-NEXT:    mov w3, #4 // =0x4
-; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; PAUTHLR-NEXT:    mov w4, #5 // =0x5
 ; PAUTHLR-NEXT:    mov w5, #6 // =0x6
 ; PAUTHLR-NEXT:    mov w6, #7 // =0x7
 ; PAUTHLR-NEXT:    mov w7, #8 // =0x8
+; PAUTHLR-NEXT:    .cfi_def_cfa wsp, 48
+; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
 ; PAUTHLR-NEXT:    add sp, sp, #32
+; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
+; PAUTHLR-NEXT:    .cfi_restore w30
+; PAUTHLR-NEXT:    .cfi_restore w29
 ; PAUTHLR-NEXT:    mov x17, x30
 ; PAUTHLR-NEXT:    add x16, sp, #16
 ; PAUTHLR-NEXT:    adrp x15, .Ltmp1
 ; PAUTHLR-NEXT:    add x15, x15, :lo12:.Ltmp1
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    autib171615
 ; PAUTHLR-NEXT:    mov x30, x17
 ; PAUTHLR-NEXT:    b callee_stack_args
+;
+; NOPAUTHLR-LABEL: tail_call_fpdiff_b_key:
+; NOPAUTHLR:       // %bb.0:
+; NOPAUTHLR-NEXT:    .cfi_b_key_frame
+; NOPAUTHLR-NEXT:    pacibsp
+; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
+; NOPAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
+; NOPAUTHLR-NEXT:    sub sp, sp, #48
+; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 48
+; NOPAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; NOPAUTHLR-NEXT:    str x22, [sp, #8]
+; NOPAUTHLR-NEXT:    add x29, sp, #16
+; NOPAUTHLR-NEXT:    .cfi_def_cfa w29, 32
+; NOPAUTHLR-NEXT:    .cfi_offset w30, -24
+; NOPAUTHLR-NEXT:    .cfi_offset w29, -32
+; NOPAUTHLR-NEXT:    mov w8, #9 // =0x9
+; NOPAUTHLR-NEXT:    mov w0, #1 // =0x1
+; NOPAUTHLR-NEXT:    mov w1, #2 // =0x2
+; NOPAUTHLR-NEXT:    str x8, [x29, #16]
+; NOPAUTHLR-NEXT:    mov w2, #3 // =0x3
+; NOPAUTHLR-NEXT:    mov w3, #4 // =0x4
+; NOPAUTHLR-NEXT:    mov w4, #5 // =0x5
+; NOPAUTHLR-NEXT:    mov w5, #6 // =0x6
+; NOPAUTHLR-NEXT:    mov w6, #7 // =0x7
+; NOPAUTHLR-NEXT:    mov w7, #8 // =0x8
+; NOPAUTHLR-NEXT:    .cfi_def_cfa wsp, 48
+; NOPAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; NOPAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
+; NOPAUTHLR-NEXT:    add sp, sp, #32
+; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 16
+; NOPAUTHLR-NEXT:    .cfi_restore w30
+; NOPAUTHLR-NEXT:    .cfi_restore w29
+; NOPAUTHLR-NEXT:    mov x17, x30
+; NOPAUTHLR-NEXT:    add x16, sp, #16
+; NOPAUTHLR-NEXT:    autib1716
+; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
+; NOPAUTHLR-NEXT:    mov x30, x17
+; NOPAUTHLR-NEXT:    b callee_stack_args
   musttail call swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
   ret void
 }
 
 ; FPDiff == 0, A-key: callee has same calling convention, no extra stack args.
-define swifttailcc void @tail_call_no_fpdiff_a_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "frame-pointer"="all" {
+define swifttailcc void @tail_call_no_fpdiff_a_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "frame-pointer"="all" uwtable(async) {
 ; COMPAT-LABEL: tail_call_no_fpdiff_a_key:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    hint #39
@@ -245,19 +359,25 @@ define swifttailcc void @tail_call_no_fpdiff_a_key(ptr swiftasync %ctx) "branch-
 ; COMPAT-NEXT:    hint #25
 ; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
 ; COMPAT-NEXT:    sub sp, sp, #32
+; COMPAT-NEXT:    .cfi_def_cfa_offset 32
 ; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; COMPAT-NEXT:    str x22, [sp, #8]
 ; COMPAT-NEXT:    add x29, sp, #16
 ; COMPAT-NEXT:    .cfi_def_cfa w29, 16
 ; COMPAT-NEXT:    .cfi_offset w30, -8
 ; COMPAT-NEXT:    .cfi_offset w29, -16
+; COMPAT-NEXT:    .cfi_def_cfa wsp, 32
 ; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
 ; COMPAT-NEXT:    add sp, sp, #32
+; COMPAT-NEXT:    .cfi_def_cfa_offset 0
+; COMPAT-NEXT:    .cfi_restore w30
+; COMPAT-NEXT:    .cfi_restore w29
 ; COMPAT-NEXT:    adrp x16, .Ltmp2
 ; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp2
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    b callee_no_stack_args
 ;
 ; V83A-LABEL: tail_call_no_fpdiff_a_key:
@@ -268,19 +388,25 @@ define swifttailcc void @tail_call_no_fpdiff_a_key(ptr swiftasync %ctx) "branch-
 ; V83A-NEXT:    paciasp
 ; V83A-NEXT:    orr x29, x29, #0x1000000000000000
 ; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 32
 ; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; V83A-NEXT:    str x22, [sp, #8]
 ; V83A-NEXT:    add x29, sp, #16
 ; V83A-NEXT:    .cfi_def_cfa w29, 16
 ; V83A-NEXT:    .cfi_offset w30, -8
 ; V83A-NEXT:    .cfi_offset w29, -16
+; V83A-NEXT:    .cfi_def_cfa wsp, 32
 ; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
 ; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 0
+; V83A-NEXT:    .cfi_restore w30
+; V83A-NEXT:    .cfi_restore w29
 ; V83A-NEXT:    adrp x16, .Ltmp2
 ; V83A-NEXT:    add x16, x16, :lo12:.Ltmp2
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    autiasp
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    b callee_no_stack_args
 ;
 ; PAUTHLR-LABEL: tail_call_no_fpdiff_a_key:
@@ -290,23 +416,53 @@ define swifttailcc void @tail_call_no_fpdiff_a_key(ptr swiftasync %ctx) "branch-
 ; PAUTHLR-NEXT:    paciasppc
 ; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
 ; PAUTHLR-NEXT:    sub sp, sp, #32
+; PAUTHLR-NEXT:    .cfi_def_cfa_offset 32
 ; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; PAUTHLR-NEXT:    str x22, [sp, #8]
 ; PAUTHLR-NEXT:    add x29, sp, #16
 ; PAUTHLR-NEXT:    .cfi_def_cfa w29, 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -8
 ; PAUTHLR-NEXT:    .cfi_offset w29, -16
+; PAUTHLR-NEXT:    .cfi_def_cfa wsp, 32
 ; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
 ; PAUTHLR-NEXT:    add sp, sp, #32
+; PAUTHLR-NEXT:    .cfi_def_cfa_offset 0
+; PAUTHLR-NEXT:    .cfi_restore w30
+; PAUTHLR-NEXT:    .cfi_restore w29
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    autiasppc .Ltmp2
 ; PAUTHLR-NEXT:    b callee_no_stack_args
+;
+; NOPAUTHLR-LABEL: tail_call_no_fpdiff_a_key:
+; NOPAUTHLR:       // %bb.0:
+; NOPAUTHLR-NEXT:    paciasp
+; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
+; NOPAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
+; NOPAUTHLR-NEXT:    sub sp, sp, #32
+; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 32
+; NOPAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; NOPAUTHLR-NEXT:    str x22, [sp, #8]
+; NOPAUTHLR-NEXT:    add x29, sp, #16
+; NOPAUTHLR-NEXT:    .cfi_def_cfa w29, 16
+; NOPAUTHLR-NEXT:    .cfi_offset w30, -8
+; NOPAUTHLR-NEXT:    .cfi_offset w29, -16
+; NOPAUTHLR-NEXT:    .cfi_def_cfa wsp, 32
+; NOPAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; NOPAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
+; NOPAUTHLR-NEXT:    add sp, sp, #32
+; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 0
+; NOPAUTHLR-NEXT:    .cfi_restore w30
+; NOPAUTHLR-NEXT:    .cfi_restore w29
+; NOPAUTHLR-NEXT:    autiasp
+; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
+; NOPAUTHLR-NEXT:    b callee_no_stack_args
   musttail call swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
   ret void
 }
 
 ; FPDiff == 0, B-key: callee has same calling convention, no extra stack args.
-define swifttailcc void @tail_call_no_fpdiff_b_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "sign-return-address-key"="b_key" "frame-pointer"="all" {
+define swifttailcc void @tail_call_no_fpdiff_b_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "sign-return-address-key"="b_key" "frame-pointer"="all" uwtable(async) {
 ; COMPAT-LABEL: tail_call_no_fpdiff_b_key:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    .cfi_b_key_frame
@@ -316,19 +472,25 @@ define swifttailcc void @tail_call_no_fpdiff_b_key(ptr swiftasync %ctx) "branch-
 ; COMPAT-NEXT:    hint #27
 ; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
 ; COMPAT-NEXT:    sub sp, sp, #32
+; COMPAT-NEXT:    .cfi_def_cfa_offset 32
 ; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; COMPAT-NEXT:    str x22, [sp, #8]
 ; COMPAT-NEXT:    add x29, sp, #16
 ; COMPAT-NEXT:    .cfi_def_cfa w29, 16
 ; COMPAT-NEXT:    .cfi_offset w30, -8
 ; COMPAT-NEXT:    .cfi_offset w29, -16
+; COMPAT-NEXT:    .cfi_def_cfa wsp, 32
 ; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
 ; COMPAT-NEXT:    add sp, sp, #32
+; COMPAT-NEXT:    .cfi_def_cfa_offset 0
+; COMPAT-NEXT:    .cfi_restore w30
+; COMPAT-NEXT:    .cfi_restore w29
 ; COMPAT-NEXT:    adrp x16, .Ltmp3
 ; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp3
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #31
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    b callee_no_stack_args
 ;
 ; V83A-LABEL: tail_call_no_fpdiff_b_key:
@@ -340,19 +502,25 @@ define swifttailcc void @tail_call_no_fpdiff_b_key(ptr swiftasync %ctx) "branch-
 ; V83A-NEXT:    pacibsp
 ; V83A-NEXT:    orr x29, x29, #0x1000000000000000
 ; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 32
 ; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; V83A-NEXT:    str x22, [sp, #8]
 ; V83A-NEXT:    add x29, sp, #16
 ; V83A-NEXT:    .cfi_def_cfa w29, 16
 ; V83A-NEXT:    .cfi_offset w30, -8
 ; V83A-NEXT:    .cfi_offset w29, -16
+; V83A-NEXT:    .cfi_def_cfa wsp, 32
 ; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
 ; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 0
+; V83A-NEXT:    .cfi_restore w30
+; V83A-NEXT:    .cfi_restore w29
 ; V83A-NEXT:    adrp x16, .Ltmp3
 ; V83A-NEXT:    add x16, x16, :lo12:.Ltmp3
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    autibsp
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    b callee_no_stack_args
 ;
 ; PAUTHLR-LABEL: tail_call_no_fpdiff_b_key:
@@ -363,17 +531,48 @@ define swifttailcc void @tail_call_no_fpdiff_b_key(ptr swiftasync %ctx) "branch-
 ; PAUTHLR-NEXT:    pacibsppc
 ; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
 ; PAUTHLR-NEXT:    sub sp, sp, #32
+; PAUTHLR-NEXT:    .cfi_def_cfa_offset 32
 ; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
 ; PAUTHLR-NEXT:    str x22, [sp, #8]
 ; PAUTHLR-NEXT:    add x29, sp, #16
 ; PAUTHLR-NEXT:    .cfi_def_cfa w29, 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -8
 ; PAUTHLR-NEXT:    .cfi_offset w29, -16
+; PAUTHLR-NEXT:    .cfi_def_cfa wsp, 32
 ; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
 ; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
 ; PAUTHLR-NEXT:    add sp, sp, #32
+; PAUTHLR-NEXT:    .cfi_def_cfa_offset 0
+; PAUTHLR-NEXT:    .cfi_restore w30
+; PAUTHLR-NEXT:    .cfi_restore w29
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    autibsppc .Ltmp3
 ; PAUTHLR-NEXT:    b callee_no_stack_args
+;
+; NOPAUTHLR-LABEL: tail_call_no_fpdiff_b_key:
+; NOPAUTHLR:       // %bb.0:
+; NOPAUTHLR-NEXT:    .cfi_b_key_frame
+; NOPAUTHLR-NEXT:    pacibsp
+; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
+; NOPAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
+; NOPAUTHLR-NEXT:    sub sp, sp, #32
+; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 32
+; NOPAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; NOPAUTHLR-NEXT:    str x22, [sp, #8]
+; NOPAUTHLR-NEXT:    add x29, sp, #16
+; NOPAUTHLR-NEXT:    .cfi_def_cfa w29, 16
+; NOPAUTHLR-NEXT:    .cfi_offset w30, -8
+; NOPAUTHLR-NEXT:    .cfi_offset w29, -16
+; NOPAUTHLR-NEXT:    .cfi_def_cfa wsp, 32
+; NOPAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; NOPAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
+; NOPAUTHLR-NEXT:    add sp, sp, #32
+; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 0
+; NOPAUTHLR-NEXT:    .cfi_restore w30
+; NOPAUTHLR-NEXT:    .cfi_restore w29
+; NOPAUTHLR-NEXT:    autibsp
+; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
+; NOPAUTHLR-NEXT:    b callee_no_stack_args
   musttail call swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
   ret void
 }

>From 034d844a26cf4f8674f9cbc554960ecee7f0f0f4 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Fri, 3 Apr 2026 11:45:15 -0700
Subject: [PATCH 09/10] clang-format

---
 llvm/lib/Target/AArch64/AArch64PointerAuth.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index 4571f578fc90b..5147557341467 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -61,8 +61,7 @@ char AArch64PointerAuth::ID = 0;
 static void emitPACSymOffsetIntoReg(const TargetInstrInfo &TII,
                                     MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator I, DebugLoc DL,
-                                    MCSymbol *PACSym,
-                                    Register Reg) {
+                                    MCSymbol *PACSym, Register Reg) {
   BuildMI(MBB, I, DL, TII.get(AArch64::ADRP), Reg)
       .addSym(PACSym, AArch64II::MO_PAGE)
       .setMIFlag(MachineInstr::FrameDestroy);
@@ -217,7 +216,8 @@ void AArch64PointerAuth::authenticateLR(
         // The PACM hint-space instruction modifies the following AUTI[AB]1716
         // to optionally take x15 as an extra operand depending on the
         // presence of +pauth-lr at runtime. On machines without +pauth-lr, it
-        // behaves as a nop, and the address of the PACI[AB]SP in x15 is ignored.
+        // behaves as a nop, and the address of the PACI[AB]SP in x15 is
+        // ignored.
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM))
             .setMIFlag(MachineInstr::FrameDestroy);
 

>From 9189c95132175aedaa66eda88ae3979557b8a061 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Fri, 3 Apr 2026 16:47:10 -0700
Subject: [PATCH 10/10] do manual check lines, since UTC on this test is too
 verbose

---
 .../AArch64/pauth-lr-tail-call-fpdiff.ll      | 813 ++++++------------
 1 file changed, 263 insertions(+), 550 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
index bfb865ac71b2d..4c1eeb7544fd4 100644
--- a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
+++ b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll
@@ -1,580 +1,293 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=aarch64              < %s | FileCheck --check-prefixes=CHECK,COMPAT %s
 ; RUN: llc -mtriple=aarch64 -mattr=v8.3a < %s | FileCheck --check-prefixes=CHECK,V83A %s
-; RUN: llc -mtriple=aarch64 -mattr=v9a -mattr=pauth-lr < %s | FileCheck --check-prefixes=PAUTHLR %s
+; RUN: llc -mtriple=aarch64 -mattr=v9a -mattr=pauth-lr < %s | FileCheck --check-prefixes=CHECK,V9A %s
 ; RUN: sed 's/"branch-protection-pauth-lr" //g' %s \
 ; RUN:   | llc -mtriple=aarch64 -mattr=v9a \
-; RUN:   | FileCheck --check-prefixes=NOPAUTHLR %s
+; RUN:   | FileCheck --check-prefixes=CHECK,PAUTH %s
 ;
 ; Test tail calls with return address signing across all three modes
-; (COMPAT, V83A, PAUTHLR) and both FPDiff==0 and FPDiff!=0 cases,
+; (COMPAT, V83A, V9A) and both FPDiff==0 and FPDiff!=0 cases,
 ; with A-key and B-key variants.
 
 declare swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64, i64, i64, i64, i64, i64, i64, i64, i64)
 declare swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
 
-; FPDiff != 0, A-key: callee has stack args that this function doesn't.
+; FPDiff != 0, A-key:     callee has stack args that this function doesn't.
 define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "frame-pointer"="all" uwtable(async) {
-; COMPAT-LABEL: tail_call_fpdiff_a_key:
-; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    hint #39
-; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
-; COMPAT-NEXT:  .Ltmp0:
-; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
-; COMPAT-NEXT:    sub sp, sp, #48
-; COMPAT-NEXT:    .cfi_def_cfa_offset 48
-; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; COMPAT-NEXT:    str x22, [sp, #8]
-; COMPAT-NEXT:    add x29, sp, #16
-; COMPAT-NEXT:    .cfi_def_cfa w29, 32
-; COMPAT-NEXT:    .cfi_offset w30, -24
-; COMPAT-NEXT:    .cfi_offset w29, -32
-; COMPAT-NEXT:    mov w8, #9 // =0x9
-; COMPAT-NEXT:    mov w0, #1 // =0x1
-; COMPAT-NEXT:    mov w1, #2 // =0x2
-; COMPAT-NEXT:    str x8, [x29, #16]
-; COMPAT-NEXT:    mov w2, #3 // =0x3
-; COMPAT-NEXT:    mov w3, #4 // =0x4
-; COMPAT-NEXT:    mov w4, #5 // =0x5
-; COMPAT-NEXT:    mov w5, #6 // =0x6
-; COMPAT-NEXT:    mov w6, #7 // =0x7
-; COMPAT-NEXT:    mov w7, #8 // =0x8
-; COMPAT-NEXT:    .cfi_def_cfa wsp, 48
-; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
-; COMPAT-NEXT:    add sp, sp, #32
-; COMPAT-NEXT:    .cfi_def_cfa_offset 16
-; COMPAT-NEXT:    .cfi_restore w30
-; COMPAT-NEXT:    .cfi_restore w29
-; COMPAT-NEXT:    mov x17, x30
-; COMPAT-NEXT:    add x16, sp, #16
-; COMPAT-NEXT:    adrp x15, .Ltmp0
-; COMPAT-NEXT:    add x15, x15, :lo12:.Ltmp0
-; COMPAT-NEXT:    hint #39
-; COMPAT-NEXT:    hint #12
-; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
-; COMPAT-NEXT:    mov x30, x17
-; COMPAT-NEXT:    b callee_stack_args
-;
-; V83A-LABEL: tail_call_fpdiff_a_key:
-; V83A:       // %bb.0:
-; V83A-NEXT:    hint #39
-; V83A-NEXT:    .cfi_negate_ra_state_with_pc
-; V83A-NEXT:  .Ltmp0:
-; V83A-NEXT:    paciasp
-; V83A-NEXT:    orr x29, x29, #0x1000000000000000
-; V83A-NEXT:    sub sp, sp, #48
-; V83A-NEXT:    .cfi_def_cfa_offset 48
-; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; V83A-NEXT:    str x22, [sp, #8]
-; V83A-NEXT:    add x29, sp, #16
-; V83A-NEXT:    .cfi_def_cfa w29, 32
-; V83A-NEXT:    .cfi_offset w30, -24
-; V83A-NEXT:    .cfi_offset w29, -32
-; V83A-NEXT:    mov w8, #9 // =0x9
-; V83A-NEXT:    mov w0, #1 // =0x1
-; V83A-NEXT:    mov w1, #2 // =0x2
-; V83A-NEXT:    str x8, [x29, #16]
-; V83A-NEXT:    mov w2, #3 // =0x3
-; V83A-NEXT:    mov w3, #4 // =0x4
-; V83A-NEXT:    mov w4, #5 // =0x5
-; V83A-NEXT:    mov w5, #6 // =0x6
-; V83A-NEXT:    mov w6, #7 // =0x7
-; V83A-NEXT:    mov w7, #8 // =0x8
-; V83A-NEXT:    .cfi_def_cfa wsp, 48
-; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
-; V83A-NEXT:    add sp, sp, #32
-; V83A-NEXT:    .cfi_def_cfa_offset 16
-; V83A-NEXT:    .cfi_restore w30
-; V83A-NEXT:    .cfi_restore w29
-; V83A-NEXT:    mov x17, x30
-; V83A-NEXT:    add x16, sp, #16
-; V83A-NEXT:    adrp x15, .Ltmp0
-; V83A-NEXT:    add x15, x15, :lo12:.Ltmp0
-; V83A-NEXT:    hint #39
-; V83A-NEXT:    autia1716
-; V83A-NEXT:    .cfi_negate_ra_state_with_pc
-; V83A-NEXT:    mov x30, x17
-; V83A-NEXT:    b callee_stack_args
-;
-; PAUTHLR-LABEL: tail_call_fpdiff_a_key:
-; PAUTHLR:       // %bb.0:
-; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
-; PAUTHLR-NEXT:  .Ltmp0:
-; PAUTHLR-NEXT:    paciasppc
-; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
-; PAUTHLR-NEXT:    sub sp, sp, #48
-; PAUTHLR-NEXT:    .cfi_def_cfa_offset 48
-; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; PAUTHLR-NEXT:    str x22, [sp, #8]
-; PAUTHLR-NEXT:    add x29, sp, #16
-; PAUTHLR-NEXT:    .cfi_def_cfa w29, 32
-; PAUTHLR-NEXT:    .cfi_offset w30, -24
-; PAUTHLR-NEXT:    .cfi_offset w29, -32
-; PAUTHLR-NEXT:    mov w8, #9 // =0x9
-; PAUTHLR-NEXT:    mov w0, #1 // =0x1
-; PAUTHLR-NEXT:    mov w1, #2 // =0x2
-; PAUTHLR-NEXT:    str x8, [x29, #16]
-; PAUTHLR-NEXT:    mov w2, #3 // =0x3
-; PAUTHLR-NEXT:    mov w3, #4 // =0x4
-; PAUTHLR-NEXT:    mov w4, #5 // =0x5
-; PAUTHLR-NEXT:    mov w5, #6 // =0x6
-; PAUTHLR-NEXT:    mov w6, #7 // =0x7
-; PAUTHLR-NEXT:    mov w7, #8 // =0x8
-; PAUTHLR-NEXT:    .cfi_def_cfa wsp, 48
-; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
-; PAUTHLR-NEXT:    add sp, sp, #32
-; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
-; PAUTHLR-NEXT:    .cfi_restore w30
-; PAUTHLR-NEXT:    .cfi_restore w29
-; PAUTHLR-NEXT:    mov x17, x30
-; PAUTHLR-NEXT:    add x16, sp, #16
-; PAUTHLR-NEXT:    adrp x15, .Ltmp0
-; PAUTHLR-NEXT:    add x15, x15, :lo12:.Ltmp0
-; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
-; PAUTHLR-NEXT:    autia171615
-; PAUTHLR-NEXT:    mov x30, x17
-; PAUTHLR-NEXT:    b callee_stack_args
-;
-; NOPAUTHLR-LABEL: tail_call_fpdiff_a_key:
-; NOPAUTHLR:       // %bb.0:
-; NOPAUTHLR-NEXT:    paciasp
-; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
-; NOPAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
-; NOPAUTHLR-NEXT:    sub sp, sp, #48
-; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 48
-; NOPAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; NOPAUTHLR-NEXT:    str x22, [sp, #8]
-; NOPAUTHLR-NEXT:    add x29, sp, #16
-; NOPAUTHLR-NEXT:    .cfi_def_cfa w29, 32
-; NOPAUTHLR-NEXT:    .cfi_offset w30, -24
-; NOPAUTHLR-NEXT:    .cfi_offset w29, -32
-; NOPAUTHLR-NEXT:    mov w8, #9 // =0x9
-; NOPAUTHLR-NEXT:    mov w0, #1 // =0x1
-; NOPAUTHLR-NEXT:    mov w1, #2 // =0x2
-; NOPAUTHLR-NEXT:    str x8, [x29, #16]
-; NOPAUTHLR-NEXT:    mov w2, #3 // =0x3
-; NOPAUTHLR-NEXT:    mov w3, #4 // =0x4
-; NOPAUTHLR-NEXT:    mov w4, #5 // =0x5
-; NOPAUTHLR-NEXT:    mov w5, #6 // =0x6
-; NOPAUTHLR-NEXT:    mov w6, #7 // =0x7
-; NOPAUTHLR-NEXT:    mov w7, #8 // =0x8
-; NOPAUTHLR-NEXT:    .cfi_def_cfa wsp, 48
-; NOPAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; NOPAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
-; NOPAUTHLR-NEXT:    add sp, sp, #32
-; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 16
-; NOPAUTHLR-NEXT:    .cfi_restore w30
-; NOPAUTHLR-NEXT:    .cfi_restore w29
-; NOPAUTHLR-NEXT:    mov x17, x30
-; NOPAUTHLR-NEXT:    add x16, sp, #16
-; NOPAUTHLR-NEXT:    autia1716
-; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
-; NOPAUTHLR-NEXT:    mov x30, x17
-; NOPAUTHLR-NEXT:    b callee_stack_args
+; CHECK-LABEL: tail_call_fpdiff_a_key:
+; CHECK:       // %bb.0:
+
+; COMPAT-NEXT:   hint #39
+; COMPAT-NEXT:   .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT: .Ltmp0:
+; COMPAT-NEXT:   hint #25
+
+; V83A-NEXT:     hint #39
+; V83A-NEXT:     .cfi_negate_ra_state_with_pc
+; V83A-NEXT:   .Ltmp0:
+; V83A-NEXT:     paciasp
+
+; V9A-NEXT:      .cfi_negate_ra_state_with_pc
+; V9A-NEXT:    .Ltmp0:
+; V9A-NEXT:      paciasppc
+
+; PAUTH-NEXT:    paciasp
+; PAUTH-NEXT:    .cfi_negate_ra_state
+
+; CHECK:         orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    str x22, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 32
+; CHECK-NEXT:    .cfi_offset w30, -24
+; CHECK-NEXT:    .cfi_offset w29, -32
+; CHECK-NEXT:    mov w8, #9 // =0x9
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w1, #2 // =0x2
+; CHECK-NEXT:    str x8, [x29, #16]
+; CHECK-NEXT:    mov w2, #3 // =0x3
+; CHECK-NEXT:    mov w3, #4 // =0x4
+; CHECK-NEXT:    mov w4, #5 // =0x5
+; CHECK-NEXT:    mov w5, #6 // =0x6
+; CHECK-NEXT:    mov w6, #7 // =0x7
+; CHECK-NEXT:    mov w7, #8 // =0x8
+; CHECK-NEXT:    .cfi_def_cfa wsp, 48
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    mov x17, x30
+; CHECK-NEXT:    add x16, sp, #16
+
+; COMPAT-NEXT:   adrp x15, .Ltmp0
+; COMPAT-NEXT:   add x15, x15, :lo12:.Ltmp0
+; COMPAT-NEXT:   hint #39
+; COMPAT-NEXT:   hint #12
+; COMPAT-NEXT:   .cfi_negate_ra_state_with_pc
+
+; V83A-NEXT:     adrp x15, .Ltmp0
+; V83A-NEXT:     add x15, x15, :lo12:.Ltmp0
+; V83A-NEXT:     hint #39
+; V83A-NEXT:     autia1716
+; V83A-NEXT:     .cfi_negate_ra_state_with_pc
+
+; V9A-NEXT:      adrp x15, .Ltmp0
+; V9A-NEXT:      add x15, x15, :lo12:.Ltmp0
+; V9A-NEXT:      .cfi_negate_ra_state_with_pc
+; V9A-NEXT:      autia171615
+
+; PAUTH-NEXT:    autia1716
+; PAUTH-NEXT:    .cfi_negate_ra_state
+
+; CHECK-NEXT:    mov x30, x17
+; CHECK-NEXT:    b callee_stack_args
   musttail call swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
   ret void
 }
 
-; FPDiff != 0, B-key: callee has stack args that this function doesn't.
+; FPDiff != 0, B-key:     callee has stack args that this function doesn't.
 define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "sign-return-address-key"="b_key" "frame-pointer"="all" uwtable(async) {
-; COMPAT-LABEL: tail_call_fpdiff_b_key:
-; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    .cfi_b_key_frame
-; COMPAT-NEXT:    hint #39
-; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
-; COMPAT-NEXT:  .Ltmp1:
-; COMPAT-NEXT:    hint #27
-; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
-; COMPAT-NEXT:    sub sp, sp, #48
-; COMPAT-NEXT:    .cfi_def_cfa_offset 48
-; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; COMPAT-NEXT:    str x22, [sp, #8]
-; COMPAT-NEXT:    add x29, sp, #16
-; COMPAT-NEXT:    .cfi_def_cfa w29, 32
-; COMPAT-NEXT:    .cfi_offset w30, -24
-; COMPAT-NEXT:    .cfi_offset w29, -32
-; COMPAT-NEXT:    mov w8, #9 // =0x9
-; COMPAT-NEXT:    mov w0, #1 // =0x1
-; COMPAT-NEXT:    mov w1, #2 // =0x2
-; COMPAT-NEXT:    str x8, [x29, #16]
-; COMPAT-NEXT:    mov w2, #3 // =0x3
-; COMPAT-NEXT:    mov w3, #4 // =0x4
-; COMPAT-NEXT:    mov w4, #5 // =0x5
-; COMPAT-NEXT:    mov w5, #6 // =0x6
-; COMPAT-NEXT:    mov w6, #7 // =0x7
-; COMPAT-NEXT:    mov w7, #8 // =0x8
-; COMPAT-NEXT:    .cfi_def_cfa wsp, 48
-; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
-; COMPAT-NEXT:    add sp, sp, #32
-; COMPAT-NEXT:    .cfi_def_cfa_offset 16
-; COMPAT-NEXT:    .cfi_restore w30
-; COMPAT-NEXT:    .cfi_restore w29
-; COMPAT-NEXT:    mov x17, x30
-; COMPAT-NEXT:    add x16, sp, #16
-; COMPAT-NEXT:    adrp x15, .Ltmp1
-; COMPAT-NEXT:    add x15, x15, :lo12:.Ltmp1
-; COMPAT-NEXT:    hint #39
-; COMPAT-NEXT:    hint #14
-; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
-; COMPAT-NEXT:    mov x30, x17
-; COMPAT-NEXT:    b callee_stack_args
-;
-; V83A-LABEL: tail_call_fpdiff_b_key:
-; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_b_key_frame
-; V83A-NEXT:    hint #39
-; V83A-NEXT:    .cfi_negate_ra_state_with_pc
-; V83A-NEXT:  .Ltmp1:
-; V83A-NEXT:    pacibsp
-; V83A-NEXT:    orr x29, x29, #0x1000000000000000
-; V83A-NEXT:    sub sp, sp, #48
-; V83A-NEXT:    .cfi_def_cfa_offset 48
-; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; V83A-NEXT:    str x22, [sp, #8]
-; V83A-NEXT:    add x29, sp, #16
-; V83A-NEXT:    .cfi_def_cfa w29, 32
-; V83A-NEXT:    .cfi_offset w30, -24
-; V83A-NEXT:    .cfi_offset w29, -32
-; V83A-NEXT:    mov w8, #9 // =0x9
-; V83A-NEXT:    mov w0, #1 // =0x1
-; V83A-NEXT:    mov w1, #2 // =0x2
-; V83A-NEXT:    str x8, [x29, #16]
-; V83A-NEXT:    mov w2, #3 // =0x3
-; V83A-NEXT:    mov w3, #4 // =0x4
-; V83A-NEXT:    mov w4, #5 // =0x5
-; V83A-NEXT:    mov w5, #6 // =0x6
-; V83A-NEXT:    mov w6, #7 // =0x7
-; V83A-NEXT:    mov w7, #8 // =0x8
-; V83A-NEXT:    .cfi_def_cfa wsp, 48
-; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
-; V83A-NEXT:    add sp, sp, #32
-; V83A-NEXT:    .cfi_def_cfa_offset 16
-; V83A-NEXT:    .cfi_restore w30
-; V83A-NEXT:    .cfi_restore w29
-; V83A-NEXT:    mov x17, x30
-; V83A-NEXT:    add x16, sp, #16
-; V83A-NEXT:    adrp x15, .Ltmp1
-; V83A-NEXT:    add x15, x15, :lo12:.Ltmp1
-; V83A-NEXT:    hint #39
-; V83A-NEXT:    autib1716
-; V83A-NEXT:    .cfi_negate_ra_state_with_pc
-; V83A-NEXT:    mov x30, x17
-; V83A-NEXT:    b callee_stack_args
-;
-; PAUTHLR-LABEL: tail_call_fpdiff_b_key:
-; PAUTHLR:       // %bb.0:
-; PAUTHLR-NEXT:    .cfi_b_key_frame
-; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
-; PAUTHLR-NEXT:  .Ltmp1:
-; PAUTHLR-NEXT:    pacibsppc
-; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
-; PAUTHLR-NEXT:    sub sp, sp, #48
-; PAUTHLR-NEXT:    .cfi_def_cfa_offset 48
-; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; PAUTHLR-NEXT:    str x22, [sp, #8]
-; PAUTHLR-NEXT:    add x29, sp, #16
-; PAUTHLR-NEXT:    .cfi_def_cfa w29, 32
-; PAUTHLR-NEXT:    .cfi_offset w30, -24
-; PAUTHLR-NEXT:    .cfi_offset w29, -32
-; PAUTHLR-NEXT:    mov w8, #9 // =0x9
-; PAUTHLR-NEXT:    mov w0, #1 // =0x1
-; PAUTHLR-NEXT:    mov w1, #2 // =0x2
-; PAUTHLR-NEXT:    str x8, [x29, #16]
-; PAUTHLR-NEXT:    mov w2, #3 // =0x3
-; PAUTHLR-NEXT:    mov w3, #4 // =0x4
-; PAUTHLR-NEXT:    mov w4, #5 // =0x5
-; PAUTHLR-NEXT:    mov w5, #6 // =0x6
-; PAUTHLR-NEXT:    mov w6, #7 // =0x7
-; PAUTHLR-NEXT:    mov w7, #8 // =0x8
-; PAUTHLR-NEXT:    .cfi_def_cfa wsp, 48
-; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
-; PAUTHLR-NEXT:    add sp, sp, #32
-; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
-; PAUTHLR-NEXT:    .cfi_restore w30
-; PAUTHLR-NEXT:    .cfi_restore w29
-; PAUTHLR-NEXT:    mov x17, x30
-; PAUTHLR-NEXT:    add x16, sp, #16
-; PAUTHLR-NEXT:    adrp x15, .Ltmp1
-; PAUTHLR-NEXT:    add x15, x15, :lo12:.Ltmp1
-; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
-; PAUTHLR-NEXT:    autib171615
-; PAUTHLR-NEXT:    mov x30, x17
-; PAUTHLR-NEXT:    b callee_stack_args
-;
-; NOPAUTHLR-LABEL: tail_call_fpdiff_b_key:
-; NOPAUTHLR:       // %bb.0:
-; NOPAUTHLR-NEXT:    .cfi_b_key_frame
-; NOPAUTHLR-NEXT:    pacibsp
-; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
-; NOPAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
-; NOPAUTHLR-NEXT:    sub sp, sp, #48
-; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 48
-; NOPAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; NOPAUTHLR-NEXT:    str x22, [sp, #8]
-; NOPAUTHLR-NEXT:    add x29, sp, #16
-; NOPAUTHLR-NEXT:    .cfi_def_cfa w29, 32
-; NOPAUTHLR-NEXT:    .cfi_offset w30, -24
-; NOPAUTHLR-NEXT:    .cfi_offset w29, -32
-; NOPAUTHLR-NEXT:    mov w8, #9 // =0x9
-; NOPAUTHLR-NEXT:    mov w0, #1 // =0x1
-; NOPAUTHLR-NEXT:    mov w1, #2 // =0x2
-; NOPAUTHLR-NEXT:    str x8, [x29, #16]
-; NOPAUTHLR-NEXT:    mov w2, #3 // =0x3
-; NOPAUTHLR-NEXT:    mov w3, #4 // =0x4
-; NOPAUTHLR-NEXT:    mov w4, #5 // =0x5
-; NOPAUTHLR-NEXT:    mov w5, #6 // =0x6
-; NOPAUTHLR-NEXT:    mov w6, #7 // =0x7
-; NOPAUTHLR-NEXT:    mov w7, #8 // =0x8
-; NOPAUTHLR-NEXT:    .cfi_def_cfa wsp, 48
-; NOPAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; NOPAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
-; NOPAUTHLR-NEXT:    add sp, sp, #32
-; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 16
-; NOPAUTHLR-NEXT:    .cfi_restore w30
-; NOPAUTHLR-NEXT:    .cfi_restore w29
-; NOPAUTHLR-NEXT:    mov x17, x30
-; NOPAUTHLR-NEXT:    add x16, sp, #16
-; NOPAUTHLR-NEXT:    autib1716
-; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
-; NOPAUTHLR-NEXT:    mov x30, x17
-; NOPAUTHLR-NEXT:    b callee_stack_args
+; CHECK-LABEL: tail_call_fpdiff_b_key:
+; CHECK:         // %bb.0:
+; CHECK-NEXT:    .cfi_b_key_frame
+
+; COMPAT-NEXT:   hint #39
+; COMPAT-NEXT:   .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT: .Ltmp1:
+; COMPAT-NEXT:   hint #27
+
+; V83A-NEXT:     hint #39
+; V83A-NEXT:     .cfi_negate_ra_state_with_pc
+; V83A-NEXT:   .Ltmp1:
+; V83A-NEXT:     pacibsp
+
+; V9A-NEXT:      .cfi_negate_ra_state_with_pc
+; V9A-NEXT:    .Ltmp1:
+; V9A-NEXT:      pacibsppc
+
+; PAUTH-NEXT:    pacibsp
+; PAUTH-NEXT:    .cfi_negate_ra_state
+
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    str x22, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 32
+; CHECK-NEXT:    .cfi_offset w30, -24
+; CHECK-NEXT:    .cfi_offset w29, -32
+; CHECK-NEXT:    mov w8, #9 // =0x9
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w1, #2 // =0x2
+; CHECK-NEXT:    str x8, [x29, #16]
+; CHECK-NEXT:    mov w2, #3 // =0x3
+; CHECK-NEXT:    mov w3, #4 // =0x4
+; CHECK-NEXT:    mov w4, #5 // =0x5
+; CHECK-NEXT:    mov w5, #6 // =0x6
+; CHECK-NEXT:    mov w6, #7 // =0x7
+; CHECK-NEXT:    mov w7, #8 // =0x8
+; CHECK-NEXT:    .cfi_def_cfa wsp, 48
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    mov x17, x30
+; CHECK-NEXT:    add x16, sp, #16
+
+; COMPAT-NEXT:   adrp x15, .Ltmp1
+; COMPAT-NEXT:   add x15, x15, :lo12:.Ltmp1
+; COMPAT-NEXT:   hint #39
+; COMPAT-NEXT:   hint #14
+; COMPAT-NEXT:   .cfi_negate_ra_state_with_pc
+
+; V83A-NEXT:     adrp x15, .Ltmp1
+; V83A-NEXT:     add x15, x15, :lo12:.Ltmp1
+; V83A-NEXT:     hint #39
+; V83A-NEXT:     autib1716
+; V83A-NEXT:     .cfi_negate_ra_state_with_pc
+
+; V9A-NEXT:      adrp x15, .Ltmp1
+; V9A-NEXT:      add x15, x15, :lo12:.Ltmp1
+; V9A-NEXT:      .cfi_negate_ra_state_with_pc
+; V9A-NEXT:      autib171615
+
+; PAUTH-NEXT:    autib1716
+; PAUTH-NEXT:    .cfi_negate_ra_state
+
+; CHECK-NEXT:    mov x30, x17
+; CHECK-NEXT:    b callee_stack_args
   musttail call swifttailcc void @callee_stack_args(ptr swiftasync %ctx, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
   ret void
 }
 
-; FPDiff == 0, A-key: callee has same calling convention, no extra stack args.
+; FPDiff == 0, A-key:     callee has same calling convention, no extra stack args.
 define swifttailcc void @tail_call_no_fpdiff_a_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "frame-pointer"="all" uwtable(async) {
-; COMPAT-LABEL: tail_call_no_fpdiff_a_key:
-; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    hint #39
-; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
-; COMPAT-NEXT:  .Ltmp2:
-; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
-; COMPAT-NEXT:    sub sp, sp, #32
-; COMPAT-NEXT:    .cfi_def_cfa_offset 32
-; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; COMPAT-NEXT:    str x22, [sp, #8]
-; COMPAT-NEXT:    add x29, sp, #16
-; COMPAT-NEXT:    .cfi_def_cfa w29, 16
-; COMPAT-NEXT:    .cfi_offset w30, -8
-; COMPAT-NEXT:    .cfi_offset w29, -16
-; COMPAT-NEXT:    .cfi_def_cfa wsp, 32
-; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
-; COMPAT-NEXT:    add sp, sp, #32
-; COMPAT-NEXT:    .cfi_def_cfa_offset 0
-; COMPAT-NEXT:    .cfi_restore w30
-; COMPAT-NEXT:    .cfi_restore w29
-; COMPAT-NEXT:    adrp x16, .Ltmp2
-; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp2
-; COMPAT-NEXT:    hint #39
-; COMPAT-NEXT:    hint #29
-; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
-; COMPAT-NEXT:    b callee_no_stack_args
-;
-; V83A-LABEL: tail_call_no_fpdiff_a_key:
-; V83A:       // %bb.0:
-; V83A-NEXT:    hint #39
-; V83A-NEXT:    .cfi_negate_ra_state_with_pc
-; V83A-NEXT:  .Ltmp2:
-; V83A-NEXT:    paciasp
-; V83A-NEXT:    orr x29, x29, #0x1000000000000000
-; V83A-NEXT:    sub sp, sp, #32
-; V83A-NEXT:    .cfi_def_cfa_offset 32
-; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; V83A-NEXT:    str x22, [sp, #8]
-; V83A-NEXT:    add x29, sp, #16
-; V83A-NEXT:    .cfi_def_cfa w29, 16
-; V83A-NEXT:    .cfi_offset w30, -8
-; V83A-NEXT:    .cfi_offset w29, -16
-; V83A-NEXT:    .cfi_def_cfa wsp, 32
-; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
-; V83A-NEXT:    add sp, sp, #32
-; V83A-NEXT:    .cfi_def_cfa_offset 0
-; V83A-NEXT:    .cfi_restore w30
-; V83A-NEXT:    .cfi_restore w29
-; V83A-NEXT:    adrp x16, .Ltmp2
-; V83A-NEXT:    add x16, x16, :lo12:.Ltmp2
-; V83A-NEXT:    hint #39
-; V83A-NEXT:    autiasp
-; V83A-NEXT:    .cfi_negate_ra_state_with_pc
-; V83A-NEXT:    b callee_no_stack_args
-;
-; PAUTHLR-LABEL: tail_call_no_fpdiff_a_key:
-; PAUTHLR:       // %bb.0:
-; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
-; PAUTHLR-NEXT:  .Ltmp2:
-; PAUTHLR-NEXT:    paciasppc
-; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
-; PAUTHLR-NEXT:    sub sp, sp, #32
-; PAUTHLR-NEXT:    .cfi_def_cfa_offset 32
-; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; PAUTHLR-NEXT:    str x22, [sp, #8]
-; PAUTHLR-NEXT:    add x29, sp, #16
-; PAUTHLR-NEXT:    .cfi_def_cfa w29, 16
-; PAUTHLR-NEXT:    .cfi_offset w30, -8
-; PAUTHLR-NEXT:    .cfi_offset w29, -16
-; PAUTHLR-NEXT:    .cfi_def_cfa wsp, 32
-; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
-; PAUTHLR-NEXT:    add sp, sp, #32
-; PAUTHLR-NEXT:    .cfi_def_cfa_offset 0
-; PAUTHLR-NEXT:    .cfi_restore w30
-; PAUTHLR-NEXT:    .cfi_restore w29
-; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
-; PAUTHLR-NEXT:    autiasppc .Ltmp2
-; PAUTHLR-NEXT:    b callee_no_stack_args
-;
-; NOPAUTHLR-LABEL: tail_call_no_fpdiff_a_key:
-; NOPAUTHLR:       // %bb.0:
-; NOPAUTHLR-NEXT:    paciasp
-; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
-; NOPAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
-; NOPAUTHLR-NEXT:    sub sp, sp, #32
-; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 32
-; NOPAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; NOPAUTHLR-NEXT:    str x22, [sp, #8]
-; NOPAUTHLR-NEXT:    add x29, sp, #16
-; NOPAUTHLR-NEXT:    .cfi_def_cfa w29, 16
-; NOPAUTHLR-NEXT:    .cfi_offset w30, -8
-; NOPAUTHLR-NEXT:    .cfi_offset w29, -16
-; NOPAUTHLR-NEXT:    .cfi_def_cfa wsp, 32
-; NOPAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; NOPAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
-; NOPAUTHLR-NEXT:    add sp, sp, #32
-; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 0
-; NOPAUTHLR-NEXT:    .cfi_restore w30
-; NOPAUTHLR-NEXT:    .cfi_restore w29
-; NOPAUTHLR-NEXT:    autiasp
-; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
-; NOPAUTHLR-NEXT:    b callee_no_stack_args
+; CHECK-LABEL: tail_call_no_fpdiff_a_key:
+; CHECK:       // %bb.0:
+
+; COMPAT-NEXT:   hint #39
+; COMPAT-NEXT:   .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT: .Ltmp2:
+; COMPAT-NEXT:   hint #25
+
+; V83A-NEXT:     hint #39
+; V83A-NEXT:     .cfi_negate_ra_state_with_pc
+; V83A-NEXT:   .Ltmp2:
+; V83A-NEXT:     paciasp
+
+; V9A-NEXT:      .cfi_negate_ra_state_with_pc
+; V9A-NEXT:    .Ltmp2:
+; V9A-NEXT:      paciasppc
+
+; PAUTH-NEXT:    paciasp
+; PAUTH-NEXT:    .cfi_negate_ra_state
+
+; CHECK:         orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    str x22, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_def_cfa wsp, 32
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+
+; COMPAT-NEXT:   adrp x16, .Ltmp2
+; COMPAT-NEXT:   add x16, x16, :lo12:.Ltmp2
+; COMPAT-NEXT:   hint #39
+; COMPAT-NEXT:   hint #29
+; COMPAT-NEXT:   .cfi_negate_ra_state_with_pc
+
+; V83A-NEXT:     adrp x16, .Ltmp2
+; V83A-NEXT:     add x16, x16, :lo12:.Ltmp2
+; V83A-NEXT:     hint #39
+; V83A-NEXT:     autiasp
+; V83A-NEXT:     .cfi_negate_ra_state_with_pc
+
+; V9A-NEXT:      .cfi_negate_ra_state_with_pc
+; V9A-NEXT:      autiasppc .Ltmp2
+
+; PAUTH-NEXT:    autiasp
+; PAUTH-NEXT:    .cfi_negate_ra_state
+
+; CHECK-NEXT:    b callee_no_stack_args
   musttail call swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
   ret void
 }
 
-; FPDiff == 0, B-key: callee has same calling convention, no extra stack args.
+; FPDiff == 0, B-key:     callee has same calling convention, no extra stack args.
 define swifttailcc void @tail_call_no_fpdiff_b_key(ptr swiftasync %ctx) "branch-protection-pauth-lr" "sign-return-address"="all" "sign-return-address-key"="b_key" "frame-pointer"="all" uwtable(async) {
-; COMPAT-LABEL: tail_call_no_fpdiff_b_key:
-; COMPAT:       // %bb.0:
-; COMPAT-NEXT:    .cfi_b_key_frame
-; COMPAT-NEXT:    hint #39
-; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
-; COMPAT-NEXT:  .Ltmp3:
-; COMPAT-NEXT:    hint #27
-; COMPAT-NEXT:    orr x29, x29, #0x1000000000000000
-; COMPAT-NEXT:    sub sp, sp, #32
-; COMPAT-NEXT:    .cfi_def_cfa_offset 32
-; COMPAT-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; COMPAT-NEXT:    str x22, [sp, #8]
-; COMPAT-NEXT:    add x29, sp, #16
-; COMPAT-NEXT:    .cfi_def_cfa w29, 16
-; COMPAT-NEXT:    .cfi_offset w30, -8
-; COMPAT-NEXT:    .cfi_offset w29, -16
-; COMPAT-NEXT:    .cfi_def_cfa wsp, 32
-; COMPAT-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; COMPAT-NEXT:    and x29, x29, #0xefffffffffffffff
-; COMPAT-NEXT:    add sp, sp, #32
-; COMPAT-NEXT:    .cfi_def_cfa_offset 0
-; COMPAT-NEXT:    .cfi_restore w30
-; COMPAT-NEXT:    .cfi_restore w29
-; COMPAT-NEXT:    adrp x16, .Ltmp3
-; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp3
-; COMPAT-NEXT:    hint #39
-; COMPAT-NEXT:    hint #31
-; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
-; COMPAT-NEXT:    b callee_no_stack_args
-;
-; V83A-LABEL: tail_call_no_fpdiff_b_key:
-; V83A:       // %bb.0:
-; V83A-NEXT:    .cfi_b_key_frame
-; V83A-NEXT:    hint #39
-; V83A-NEXT:    .cfi_negate_ra_state_with_pc
-; V83A-NEXT:  .Ltmp3:
-; V83A-NEXT:    pacibsp
-; V83A-NEXT:    orr x29, x29, #0x1000000000000000
-; V83A-NEXT:    sub sp, sp, #32
-; V83A-NEXT:    .cfi_def_cfa_offset 32
-; V83A-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; V83A-NEXT:    str x22, [sp, #8]
-; V83A-NEXT:    add x29, sp, #16
-; V83A-NEXT:    .cfi_def_cfa w29, 16
-; V83A-NEXT:    .cfi_offset w30, -8
-; V83A-NEXT:    .cfi_offset w29, -16
-; V83A-NEXT:    .cfi_def_cfa wsp, 32
-; V83A-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; V83A-NEXT:    and x29, x29, #0xefffffffffffffff
-; V83A-NEXT:    add sp, sp, #32
-; V83A-NEXT:    .cfi_def_cfa_offset 0
-; V83A-NEXT:    .cfi_restore w30
-; V83A-NEXT:    .cfi_restore w29
-; V83A-NEXT:    adrp x16, .Ltmp3
-; V83A-NEXT:    add x16, x16, :lo12:.Ltmp3
-; V83A-NEXT:    hint #39
-; V83A-NEXT:    autibsp
-; V83A-NEXT:    .cfi_negate_ra_state_with_pc
-; V83A-NEXT:    b callee_no_stack_args
-;
-; PAUTHLR-LABEL: tail_call_no_fpdiff_b_key:
-; PAUTHLR:       // %bb.0:
-; PAUTHLR-NEXT:    .cfi_b_key_frame
-; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
-; PAUTHLR-NEXT:  .Ltmp3:
-; PAUTHLR-NEXT:    pacibsppc
-; PAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
-; PAUTHLR-NEXT:    sub sp, sp, #32
-; PAUTHLR-NEXT:    .cfi_def_cfa_offset 32
-; PAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; PAUTHLR-NEXT:    str x22, [sp, #8]
-; PAUTHLR-NEXT:    add x29, sp, #16
-; PAUTHLR-NEXT:    .cfi_def_cfa w29, 16
-; PAUTHLR-NEXT:    .cfi_offset w30, -8
-; PAUTHLR-NEXT:    .cfi_offset w29, -16
-; PAUTHLR-NEXT:    .cfi_def_cfa wsp, 32
-; PAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; PAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
-; PAUTHLR-NEXT:    add sp, sp, #32
-; PAUTHLR-NEXT:    .cfi_def_cfa_offset 0
-; PAUTHLR-NEXT:    .cfi_restore w30
-; PAUTHLR-NEXT:    .cfi_restore w29
-; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
-; PAUTHLR-NEXT:    autibsppc .Ltmp3
-; PAUTHLR-NEXT:    b callee_no_stack_args
-;
-; NOPAUTHLR-LABEL: tail_call_no_fpdiff_b_key:
-; NOPAUTHLR:       // %bb.0:
-; NOPAUTHLR-NEXT:    .cfi_b_key_frame
-; NOPAUTHLR-NEXT:    pacibsp
-; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
-; NOPAUTHLR-NEXT:    orr x29, x29, #0x1000000000000000
-; NOPAUTHLR-NEXT:    sub sp, sp, #32
-; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 32
-; NOPAUTHLR-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
-; NOPAUTHLR-NEXT:    str x22, [sp, #8]
-; NOPAUTHLR-NEXT:    add x29, sp, #16
-; NOPAUTHLR-NEXT:    .cfi_def_cfa w29, 16
-; NOPAUTHLR-NEXT:    .cfi_offset w30, -8
-; NOPAUTHLR-NEXT:    .cfi_offset w29, -16
-; NOPAUTHLR-NEXT:    .cfi_def_cfa wsp, 32
-; NOPAUTHLR-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; NOPAUTHLR-NEXT:    and x29, x29, #0xefffffffffffffff
-; NOPAUTHLR-NEXT:    add sp, sp, #32
-; NOPAUTHLR-NEXT:    .cfi_def_cfa_offset 0
-; NOPAUTHLR-NEXT:    .cfi_restore w30
-; NOPAUTHLR-NEXT:    .cfi_restore w29
-; NOPAUTHLR-NEXT:    autibsp
-; NOPAUTHLR-NEXT:    .cfi_negate_ra_state
-; NOPAUTHLR-NEXT:    b callee_no_stack_args
+; CHECK-LABEL: tail_call_no_fpdiff_b_key:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    .cfi_b_key_frame
+
+; COMPAT-NEXT:   hint #39
+; COMPAT-NEXT:   .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT: .Ltmp3:
+; COMPAT-NEXT:   hint #27
+
+; V83A-NEXT:     hint #39
+; V83A-NEXT:      .cfi_negate_ra_state_with_pc
+; V83A-NEXT:   .Ltmp3:
+; V83A-NEXT:     pacibsp
+
+; V9A-NEXT:      .cfi_negate_ra_state_with_pc
+; V9A-NEXT:    .Ltmp3:
+; V9A-NEXT:      pacibsppc
+
+; PAUTH-NEXT:    pacibsp
+; PAUTH-NEXT:    .cfi_negate_ra_state
+
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    str x22, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_def_cfa wsp, 32
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+
+; COMPAT-NEXT:   adrp x16, .Ltmp3
+; COMPAT-NEXT:   add x16, x16, :lo12:.Ltmp3
+; COMPAT-NEXT:   hint #39
+; COMPAT-NEXT:   hint #31
+; COMPAT-NEXT:   .cfi_negate_ra_state_with_pc
+
+; V83A-NEXT:     adrp x16, .Ltmp3
+; V83A-NEXT:     add x16, x16, :lo12:.Ltmp3
+; V83A-NEXT:     hint #39
+; V83A-NEXT:     autibsp
+; V83A-NEXT:     .cfi_negate_ra_state_with_pc
+
+; V9A-NEXT:      .cfi_negate_ra_state_with_pc
+; V9A-NEXT:      autibsppc .Ltmp3
+
+; PAUTH-NEXT:    autibsp
+; PAUTH-NEXT:    .cfi_negate_ra_state
+
+; CHECK-NEXT:    b callee_no_stack_args
   musttail call swifttailcc void @callee_no_stack_args(ptr swiftasync %ctx)
   ret void
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}



More information about the llvm-commits mailing list