[llvm-branch-commits] [llvm] [AArch64][PAC] Lower authenticated calls with ptrauth bundles. (PR #85736)

Ahmed Bougacha via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu May 30 16:33:06 PDT 2024


https://github.com/ahmedbougacha updated https://github.com/llvm/llvm-project/pull/85736

>From 05a7f0e6fde908fc53d8aee46feb36c896968247 Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed at bougacha.org>
Date: Mon, 27 Sep 2021 08:00:00 -0700
Subject: [PATCH 1/5] [AArch64] Adopt x8+ allocation order for GPR64noip.

73078ecd381 added GPR64noip for hwasan pseudos.
Give it an allocation order that prefers allocating from x8 and up,
to match GPR64: this allows for easier regalloc, as x0-x7 are
likely to be used for parameter passing.
---
 llvm/lib/Target/AArch64/AArch64RegisterInfo.td | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 80d0f9c57f4b3..dfaa67dd1959d 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -234,7 +234,10 @@ def tcGPRnotx16 : RegisterClass<"AArch64", [i64], 64, (sub tcGPR64, X16)>;
 // Register set that excludes registers that are reserved for procedure calls.
 // This is used for pseudo-instructions that are actually implemented using a
 // procedure call.
-def GPR64noip : RegisterClass<"AArch64", [i64], 64, (sub GPR64, X16, X17, LR)>;
+def GPR64noip : RegisterClass<"AArch64", [i64], 64, (sub GPR64, X16, X17, LR)> {
+  let AltOrders = [(rotl GPR64noip, 8)];
+  let AltOrderSelect = [{ return 1; }];
+}
 
 // GPR register classes for post increment amount of vector load/store that
 // has alternate printing when Rm=31 and prints a constant immediate value

>From 93755dab8f22d0ff3531f0a040db29e7c11d3e43 Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed at bougacha.org>
Date: Wed, 24 Jan 2024 15:03:49 -0800
Subject: [PATCH 2/5] [AArch64][PAC] Lower authenticated calls with ptrauth
 bundles.

This adds codegen support for the "ptrauth" operand bundles, which can
be used to augment indirect calls with the equivalent of an
`@llvm.ptrauth.auth` intrinsic call on the call target (possibly
preceded by an `@llvm.ptrauth.blend` on the auth discriminator if
applicable.)

This allows the generation of combined authenticating calls
on AArch64 (in the BLRA* PAuth instructions), while avoiding
the raw just-authenticated function pointer from being
exposed to attackers.

This is done by threading a PtrAuthInfo descriptor through
the call lowering infrastructure.

Note that this also applies to the other forms of indirect calls,
notably invokes, rvmarker, and tail calls.  Tail-calls in particular
bring some additional complexity, with the intersecting register
constraints of BTI and PAC discriminator computation.
---
 .../llvm/CodeGen/GlobalISel/CallLowering.h    |   8 +
 llvm/include/llvm/CodeGen/TargetLowering.h    |  18 ++
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  |   2 +
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |  16 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  51 ++++-
 .../SelectionDAG/SelectionDAGBuilder.h        |   6 +-
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 129 ++++++++++++
 .../AArch64/AArch64ExpandPseudoInsts.cpp      |  43 +++-
 .../Target/AArch64/AArch64ISelLowering.cpp    | 103 ++++++---
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  12 ++
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |   2 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  79 +++++++
 .../AArch64/GISel/AArch64CallLowering.cpp     |  88 ++++++--
 .../AArch64/GISel/AArch64GlobalISelUtils.cpp  |  29 +++
 .../AArch64/GISel/AArch64GlobalISelUtils.h    |   6 +
 .../AArch64/GlobalISel/ptrauth-invoke.ll      | 183 ++++++++++++++++
 ...ranch-target-enforcement-indirect-calls.ll |   4 +-
 llvm/test/CodeGen/AArch64/ptrauth-bti-call.ll | 105 ++++++++++
 .../CodeGen/AArch64/ptrauth-call-rv-marker.ll | 154 ++++++++++++++
 llvm/test/CodeGen/AArch64/ptrauth-call.ll     | 195 ++++++++++++++++++
 llvm/test/CodeGen/AArch64/ptrauth-invoke.ll   | 189 +++++++++++++++++
 21 files changed, 1364 insertions(+), 58 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-invoke.ll
 create mode 100644 llvm/test/CodeGen/AArch64/ptrauth-bti-call.ll
 create mode 100644 llvm/test/CodeGen/AArch64/ptrauth-call-rv-marker.ll
 create mode 100644 llvm/test/CodeGen/AArch64/ptrauth-call.ll
 create mode 100644 llvm/test/CodeGen/AArch64/ptrauth-invoke.ll

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 4c187a3068d82..fb298898304eb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -99,6 +99,11 @@ class CallLowering {
     ArgInfo() = default;
   };
 
+  struct PointerAuthInfo {
+    Register Discriminator;
+    uint64_t Key;
+  };
+
   struct CallLoweringInfo {
     /// Calling convention to be used for the call.
     CallingConv::ID CallConv = CallingConv::C;
@@ -125,6 +130,8 @@ class CallLowering {
 
     MDNode *KnownCallees = nullptr;
 
+    std::optional<PointerAuthInfo> PAI;
+
     /// True if the call must be tail call optimized.
     bool IsMustTailCall = false;
 
@@ -587,6 +594,7 @@ class CallLowering {
   bool lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &Call,
                  ArrayRef<Register> ResRegs,
                  ArrayRef<ArrayRef<Register>> ArgRegs, Register SwiftErrorVReg,
+                 std::optional<PointerAuthInfo> PAI,
                  Register ConvergenceCtrlToken,
                  std::function<unsigned()> GetCalleeReg) const;
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 7ed08cfa8a202..01c66f3f49557 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4361,6 +4361,9 @@ class TargetLowering : public TargetLoweringBase {
   /// Return true if the target supports kcfi operand bundles.
   virtual bool supportKCFIBundles() const { return false; }
 
+  /// Return true if the target supports ptrauth operand bundles.
+  virtual bool supportPtrAuthBundles() const { return false; }
+
   /// Perform necessary initialization to handle a subset of CSRs explicitly
   /// via copies. This function is called at the beginning of instruction
   /// selection.
@@ -4472,6 +4475,14 @@ class TargetLowering : public TargetLoweringBase {
     llvm_unreachable("Not Implemented");
   }
 
+  /// This structure contains the information necessary for lowering
+  /// pointer-authenticating indirect calls.  It is equivalent to the "ptrauth"
+  /// operand bundle found on the call instruction, if any.
+  struct PtrAuthInfo {
+    uint64_t Key;
+    SDValue Discriminator;
+  };
+
   /// This structure contains all information that is necessary for lowering
   /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder
   /// needs to lower a call, and targets will see this struct in their LowerCall
@@ -4511,6 +4522,8 @@ class TargetLowering : public TargetLoweringBase {
     const ConstantInt *CFIType = nullptr;
     SDValue ConvergenceControlToken;
 
+    std::optional<PtrAuthInfo> PAI;
+
     CallLoweringInfo(SelectionDAG &DAG)
         : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
           DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
@@ -4633,6 +4646,11 @@ class TargetLowering : public TargetLoweringBase {
       return *this;
     }
 
+    CallLoweringInfo &setPtrAuth(PtrAuthInfo Value) {
+      PAI = Value;
+      return *this;
+    }
+
     CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
       IsPostTypeLegalization = Value;
       return *this;
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 363fad53b76c3..740a00d8afdd4 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -92,6 +92,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
                              ArrayRef<Register> ResRegs,
                              ArrayRef<ArrayRef<Register>> ArgRegs,
                              Register SwiftErrorVReg,
+                             std::optional<PointerAuthInfo> PAI,
                              Register ConvergenceCtrlToken,
                              std::function<unsigned()> GetCalleeReg) const {
   CallLoweringInfo Info;
@@ -188,6 +189,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
   Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
   Info.CallConv = CallConv;
   Info.SwiftErrorVReg = SwiftErrorVReg;
+  Info.PAI = PAI;
   Info.ConvergenceCtrlToken = ConvergenceCtrlToken;
   Info.IsMustTailCall = CB.isMustTailCall();
   Info.IsTailCall = CanBeTailCalled;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 77ee5e645288b..299f16dbddf12 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2642,6 +2642,20 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
     }
   }
 
+  std::optional<CallLowering::PointerAuthInfo> PAI;
+  if (CB.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) {
+    // Functions should never be ptrauth-called directly.
+    assert(!CB.getCalledFunction() && "invalid direct ptrauth call");
+
+    auto PAB = CB.getOperandBundle("ptrauth");
+    Value *Key = PAB->Inputs[0];
+    Value *Discriminator = PAB->Inputs[1];
+
+    Register DiscReg = getOrCreateVReg(*Discriminator);
+    PAI = CallLowering::PointerAuthInfo{DiscReg,
+                                        cast<ConstantInt>(Key)->getZExtValue()};
+  }
+
   Register ConvergenceCtrlToken = 0;
   if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
     const auto &Token = *Bundle->Inputs[0].get();
@@ -2652,7 +2666,7 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
   // optimize into tail calls. Instead, we defer that to selection where a final
   // scan is done to check if any instructions are calls.
   bool Success = CLI->lowerCall(
-      MIRBuilder, CB, Res, Args, SwiftErrorVReg, ConvergenceCtrlToken,
+      MIRBuilder, CB, Res, Args, SwiftErrorVReg, PAI, ConvergenceCtrlToken,
       [&]() { return getOrCreateVReg(*CB.getCalledOperand()); });
 
   // Check if we just inserted a tail call.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f47aea29625f6..1de98e5559ee8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3307,12 +3307,12 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
   const BasicBlock *EHPadBB = I.getSuccessor(1);
   MachineBasicBlock *EHPadMBB = FuncInfo.MBBMap[EHPadBB];
 
-  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+  // Deopt and ptrauth bundles are lowered in helper functions, and we don't
   // have to do anything here to lower funclet bundles.
   assert(!I.hasOperandBundlesOtherThan(
              {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
               LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
-              LLVMContext::OB_cfguardtarget,
+              LLVMContext::OB_cfguardtarget, LLVMContext::OB_ptrauth,
               LLVMContext::OB_clang_arc_attachedcall}) &&
          "Cannot lower invokes with arbitrary operand bundles yet!");
 
@@ -3363,6 +3363,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
     // intrinsic, and right now there are no plans to support other intrinsics
     // with deopt state.
     LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
+  } else if (I.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) {
+    LowerCallSiteWithPtrAuthBundle(cast<CallBase>(I), EHPadBB);
   } else {
     LowerCallTo(I, getValue(Callee), false, false, EHPadBB);
   }
@@ -8531,9 +8533,9 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
 }
 
 void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
-                                      bool isTailCall,
-                                      bool isMustTailCall,
-                                      const BasicBlock *EHPadBB) {
+                                      bool isTailCall, bool isMustTailCall,
+                                      const BasicBlock *EHPadBB,
+                                      const TargetLowering::PtrAuthInfo *PAI) {
   auto &DL = DAG.getDataLayout();
   FunctionType *FTy = CB.getFunctionType();
   Type *RetTy = CB.getType();
@@ -8640,6 +8642,15 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
           CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
       .setCFIType(CFIType)
       .setConvergenceControlToken(ConvControlToken);
+
+  // Set the pointer authentication info if we have it.
+  if (PAI) {
+    if (!TLI.supportPtrAuthBundles())
+      report_fatal_error(
+          "This target doesn't support calls with ptrauth operand bundles.");
+    CLI.setPtrAuth(*PAI);
+  }
+
   std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
 
   if (Result.first.getNode()) {
@@ -9185,6 +9196,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     }
   }
 
+  if (I.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) {
+    LowerCallSiteWithPtrAuthBundle(cast<CallBase>(I), /*EHPadBB=*/nullptr);
+    return;
+  }
+
   // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
   // have to do anything here to lower funclet bundles.
   // CFGuardTarget bundles are lowered in LowerCallTo.
@@ -9206,6 +9222,31 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
 }
 
+void SelectionDAGBuilder::LowerCallSiteWithPtrAuthBundle(
+    const CallBase &CB, const BasicBlock *EHPadBB) {
+  auto PAB = CB.getOperandBundle("ptrauth");
+  auto *CalleeV = CB.getCalledOperand();
+
+  // Gather the call ptrauth data from the operand bundle:
+  //   [ i32 <key>, i64 <discriminator> ]
+  auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
+  Value *Discriminator = PAB->Inputs[1];
+
+  assert(Key->getType()->isIntegerTy(32) && "Invalid ptrauth key");
+  assert(Discriminator->getType()->isIntegerTy(64) &&
+         "Invalid ptrauth discriminator");
+
+  // Functions should never be ptrauth-called directly.
+  assert(!isa<Function>(CalleeV) && "invalid direct ptrauth call");
+
+  // Otherwise, do an authenticated indirect call.
+  TargetLowering::PtrAuthInfo PAI = {Key->getZExtValue(),
+                                     getValue(Discriminator)};
+
+  LowerCallTo(CB, getValue(CalleeV), CB.isTailCall(), CB.isMustTailCall(),
+              EHPadBB, &PAI);
+}
+
 namespace {
 
 /// AsmOperandInfo - This contains information for each constraint that we are
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 211e1653de560..c6aa4f1c5c3d8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -406,7 +406,8 @@ class SelectionDAGBuilder {
   void CopyToExportRegsIfNeeded(const Value *V);
   void ExportFromCurrentBlock(const Value *V);
   void LowerCallTo(const CallBase &CB, SDValue Callee, bool IsTailCall,
-                   bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr);
+                   bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr,
+                   const TargetLowering::PtrAuthInfo *PAI = nullptr);
 
   // Lower range metadata from 0 to N to assert zext to an integer of nearest
   // floor power of two.
@@ -490,6 +491,9 @@ class SelectionDAGBuilder {
                                         bool VarArgDisallowed,
                                         bool ForceVoidReturnTy);
 
+  void LowerCallSiteWithPtrAuthBundle(const CallBase &CB,
+                                      const BasicBlock *EHPadBB);
+
   /// Returns the type of FrameIndex and TargetFrameIndex nodes.
   MVT getFrameIndexTy() {
     return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout());
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index ee39c6355c298..22af1f22f1f25 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -125,6 +125,12 @@ class AArch64AsmPrinter : public AsmPrinter {
 
   void emitSled(const MachineInstr &MI, SledKind Kind);
 
+  // Emit the sequence for BLRA (authenticate + branch).
+  void emitPtrauthBranch(const MachineInstr *MI);
+  // Emit the sequence to compute a discriminator into x17, or reuse AddrDisc.
+  unsigned emitPtrauthDiscriminator(uint16_t Disc, unsigned AddrDisc,
+                                    unsigned &InstsEmitted);
+
   /// tblgen'erated driver function for lowering simple MI->MC
   /// pseudo instructions.
   bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
@@ -1504,6 +1510,77 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
   }
 }
 
+unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
+                                                     unsigned AddrDisc,
+                                                     unsigned &InstsEmitted) {
+  // So far we've used NoRegister in pseudos.  Now we need real encodings.
+  if (AddrDisc == AArch64::NoRegister)
+    AddrDisc = AArch64::XZR;
+
+  // If there is no constant discriminator, there's no blend involved:
+  // just use the address discriminator register as-is (XZR or not).
+  if (!Disc)
+    return AddrDisc;
+
+  // If there's only a constant discriminator, MOV it into x17.
+  if (AddrDisc == AArch64::XZR) {
+    EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::MOVZXi)
+                                     .addReg(AArch64::X17)
+                                     .addImm(Disc)
+                                     .addImm(/*shift=*/0));
+    ++InstsEmitted;
+    return AArch64::X17;
+  }
+
+  // If there are both, emit a blend into x17.
+  EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ORRXrs)
+                                   .addReg(AArch64::X17)
+                                   .addReg(AArch64::XZR)
+                                   .addReg(AddrDisc)
+                                   .addImm(0));
+  ++InstsEmitted;
+  EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::MOVKXi)
+                                   .addReg(AArch64::X17)
+                                   .addReg(AArch64::X17)
+                                   .addImm(Disc)
+                                   .addImm(/*shift=*/48));
+  ++InstsEmitted;
+  return AArch64::X17;
+}
+
+void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
+  unsigned InstsEmitted = 0;
+
+  unsigned BrTarget = MI->getOperand(0).getReg();
+  auto Key = (AArch64PACKey::ID)MI->getOperand(1).getImm();
+  uint64_t Disc = MI->getOperand(2).getImm();
+  unsigned AddrDisc = MI->getOperand(3).getReg();
+
+  // Compute discriminator into x17
+  assert(isUInt<16>(Disc));
+  unsigned DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, InstsEmitted);
+  bool IsZeroDisc = DiscReg == AArch64::XZR;
+
+  assert((Key == AArch64PACKey::IA || Key == AArch64PACKey::IB) &&
+         "Invalid auth call key");
+
+  unsigned Opc;
+  if (Key == AArch64PACKey::IA)
+    Opc = IsZeroDisc ? AArch64::BLRAAZ : AArch64::BLRAA;
+  else
+    Opc = IsZeroDisc ? AArch64::BLRABZ : AArch64::BLRAB;
+
+  MCInst BRInst;
+  BRInst.setOpcode(Opc);
+  BRInst.addOperand(MCOperand::createReg(BrTarget));
+  if (!IsZeroDisc)
+    BRInst.addOperand(MCOperand::createReg(DiscReg));
+  EmitToStreamer(*OutStreamer, BRInst);
+  ++InstsEmitted;
+
+  assert(STI->getInstrInfo()->getInstSizeInBytes(*MI) >= InstsEmitted * 4);
+}
+
 // Simple pseudo-instructions have their lowering (with expansion to real
 // instructions) auto-generated.
 #include "AArch64GenMCPseudoLowering.inc"
@@ -1639,9 +1716,61 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
     return;
   }
 
+  case AArch64::BLRA:
+    emitPtrauthBranch(MI);
+    return;
+
   // Tail calls use pseudo instructions so they have the proper code-gen
   // attributes (isCall, isReturn, etc.). We lower them to the real
   // instruction here.
+  case AArch64::AUTH_TCRETURN:
+  case AArch64::AUTH_TCRETURN_BTI: {
+    const uint64_t Key = MI->getOperand(2).getImm();
+    assert(Key < 2 && "Unknown key kind for authenticating tail-call return");
+    const uint64_t Disc = MI->getOperand(3).getImm();
+    Register AddrDisc = MI->getOperand(4).getReg();
+
+    Register ScratchReg = MI->getOperand(0).getReg() == AArch64::X16
+                              ? AArch64::X17
+                              : AArch64::X16;
+
+    unsigned DiscReg = AddrDisc;
+    if (Disc) {
+      assert(isUInt<16>(Disc) && "Integer discriminator is too wide");
+
+      if (AddrDisc != AArch64::NoRegister) {
+        EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ORRXrs)
+                                         .addReg(ScratchReg)
+                                         .addReg(AArch64::XZR)
+                                         .addReg(AddrDisc)
+                                         .addImm(0));
+        EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::MOVKXi)
+                                         .addReg(ScratchReg)
+                                         .addReg(ScratchReg)
+                                         .addImm(Disc)
+                                         .addImm(/*shift=*/48));
+      } else {
+        EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::MOVZXi)
+                                         .addReg(ScratchReg)
+                                         .addImm(Disc)
+                                         .addImm(/*shift=*/0));
+      }
+      DiscReg = ScratchReg;
+    }
+
+    const bool isZero = DiscReg == AArch64::NoRegister;
+    const unsigned Opcodes[2][2] = {{AArch64::BRAA, AArch64::BRAAZ},
+                                    {AArch64::BRAB, AArch64::BRABZ}};
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcodes[Key][isZero]);
+    TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+    if (!isZero)
+      TmpInst.addOperand(MCOperand::createReg(DiscReg));
+    EmitToStreamer(*OutStreamer, TmpInst);
+    return;
+  }
+
   case AArch64::TCRETURNri:
   case AArch64::TCRETURNrix16x17:
   case AArch64::TCRETURNrix17:
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 03f0778bae59d..657324d2307c5 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -817,10 +817,44 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
   MachineInstr &MI = *MBBI;
   MachineOperand &RVTarget = MI.getOperand(0);
   assert(RVTarget.isGlobal() && "invalid operand for attached call");
-  MachineInstr *OriginalCall =
-      createCall(MBB, MBBI, TII, MI.getOperand(1),
-                 // Regmask starts after the RV and call targets.
-                 /*RegMaskStartIdx=*/2);
+
+  MachineInstr *OriginalCall = nullptr;
+
+  if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
+    // Pointer auth call.
+    MachineOperand &Key = MI.getOperand(2);
+    assert((Key.getImm() == 0 || Key.getImm() == 1) &&
+           "invalid key for ptrauth call");
+    MachineOperand &IntDisc = MI.getOperand(3);
+    MachineOperand &AddrDisc = MI.getOperand(4);
+
+    OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BLRA))
+                       .getInstr();
+    OriginalCall->addOperand(MI.getOperand(1));
+    OriginalCall->addOperand(Key);
+    OriginalCall->addOperand(IntDisc);
+    OriginalCall->addOperand(AddrDisc);
+
+    unsigned RegMaskStartIdx = 5;
+    // Skip register arguments. Those are added during ISel, but are not
+    // needed for the concrete branch.
+    while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
+      auto MOP = MI.getOperand(RegMaskStartIdx);
+      assert(MOP.isReg() && "can only add register operands");
+      OriginalCall->addOperand(MachineOperand::CreateReg(
+          MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
+          /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
+      RegMaskStartIdx++;
+    }
+    for (const MachineOperand &MO :
+         llvm::drop_begin(MI.operands(), RegMaskStartIdx))
+      OriginalCall->addOperand(MO);
+  } else {
+    assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
+    OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(1),
+                              // Regmask starts after the RV and call targets.
+                              /*RegMaskStartIdx=*/2);
+  }
 
   BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
                      .addReg(AArch64::FP, RegState::Define)
@@ -1529,6 +1563,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
    case AArch64::LDR_PPXI:
      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
    case AArch64::BLR_RVMARKER:
+   case AArch64::BLRA_RVMARKER:
      return expandCALL_RVMARKER(MBB, MBBI);
    case AArch64::BLR_BTI:
      return expandCALL_BTI(MBB, MBBI);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c1ca78af5cda8..c5f08e8b1817a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -329,6 +329,40 @@ static bool isZeroingInactiveLanes(SDValue Op) {
   }
 }
 
+static std::tuple<SDValue, SDValue>
+extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {
+  SDLoc DL(Disc);
+  SDValue AddrDisc;
+  SDValue ConstDisc;
+
+  // If this is a blend, remember the constant and address discriminators.
+  // Otherwise, it's either a constant discriminator, or a non-blended
+  // address discriminator.
+  if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+      Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
+    AddrDisc = Disc->getOperand(1);
+    ConstDisc = Disc->getOperand(2);
+  } else {
+    ConstDisc = Disc;
+  }
+
+  // If the constant discriminator (either the blend RHS, or the entire
+  // discriminator value) isn't a 16-bit constant, bail out, and let the
+  // discriminator be computed separately.
+  auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
+  if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
+    return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
+
+  // If there's no address discriminator, use NoRegister, which we'll later
+  // replace with XZR, or directly use a Z variant of the inst. when available.
+  if (!AddrDisc)
+    AddrDisc = DAG->getRegister(AArch64::NoRegister, MVT::i64);
+
+  return std::make_tuple(
+      DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
+      AddrDisc);
+}
+
 AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                                              const AArch64Subtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
@@ -2410,6 +2444,9 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::ADRP)
     MAKE_CASE(AArch64ISD::ADR)
     MAKE_CASE(AArch64ISD::ADDlow)
+    MAKE_CASE(AArch64ISD::AUTH_CALL)
+    MAKE_CASE(AArch64ISD::AUTH_TC_RETURN)
+    MAKE_CASE(AArch64ISD::AUTH_CALL_RVMARKER)
     MAKE_CASE(AArch64ISD::LOADgot)
     MAKE_CASE(AArch64ISD::RET_GLUE)
     MAKE_CASE(AArch64ISD::BRCOND)
@@ -8410,10 +8447,30 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     InGlue = Chain.getValue(1);
   }
 
+  unsigned Opc = IsTailCall ? AArch64ISD::TC_RETURN : AArch64ISD::CALL;
+
   std::vector<SDValue> Ops;
   Ops.push_back(Chain);
   Ops.push_back(Callee);
 
+  // Calls with operand bundle "clang.arc.attachedcall" are special. They should
+  // be expanded to the call, directly followed by a special marker sequence and
+  // a call to an ObjC library function.  Use CALL_RVMARKER to do that.
+  if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
+    assert(!IsTailCall &&
+           "tail calls cannot be marked with clang.arc.attachedcall");
+    Opc = AArch64ISD::CALL_RVMARKER;
+
+    // Add a target global address for the retainRV/claimRV runtime function
+    // just before the call target.
+    Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
+    auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
+    Ops.insert(Ops.begin() + 1, GA);
+  } else if (CallConv == CallingConv::ARM64EC_Thunk_X64) {
+    Opc = AArch64ISD::CALL_ARM64EC_TO_X64;
+  } else if (GuardWithBTI)
+    Opc = AArch64ISD::CALL_BTI;
+
   if (IsTailCall) {
     // Each tail call may have to adjust the stack by a different amount, so
     // this information must travel along with the operation for eventual
@@ -8421,6 +8478,26 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
   }
 
+  if (CLI.PAI) {
+    const uint64_t Key = CLI.PAI->Key;
+    // Authenticated calls only support IA and IB.
+    if (Key > 1)
+      report_fatal_error("Unsupported key kind for authenticating call");
+
+    // Split the discriminator into address/integer components.
+    SDValue AddrDisc, IntDisc;
+    std::tie(IntDisc, AddrDisc) =
+        extractPtrauthBlendDiscriminators(CLI.PAI->Discriminator, &DAG);
+
+    if (Opc == AArch64ISD::CALL_RVMARKER)
+      Opc = AArch64ISD::AUTH_CALL_RVMARKER;
+    else
+      Opc = IsTailCall ? AArch64ISD::AUTH_TC_RETURN : AArch64ISD::AUTH_CALL;
+    Ops.push_back(DAG.getTargetConstant(Key, DL, MVT::i32));
+    Ops.push_back(IntDisc);
+    Ops.push_back(AddrDisc);
+  }
+
   // Add argument registers to the end of the list so that they are known live
   // into the call.
   for (auto &RegToPass : RegsToPass)
@@ -8458,8 +8535,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   // actual call instruction.
   if (IsTailCall) {
     MF.getFrameInfo().setHasTailCall();
-    SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
-
+    SDValue Ret = DAG.getNode(Opc, DL, NodeTys, Ops);
     if (IsCFICall)
       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
 
@@ -8468,29 +8544,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     return Ret;
   }
 
-  unsigned CallOpc = AArch64ISD::CALL;
-  // Calls with operand bundle "clang.arc.attachedcall" are special. They should
-  // be expanded to the call, directly followed by a special marker sequence and
-  // a call to an ObjC library function.  Use CALL_RVMARKER to do that.
-  if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
-    assert(!IsTailCall &&
-           "tail calls cannot be marked with clang.arc.attachedcall");
-    CallOpc = AArch64ISD::CALL_RVMARKER;
-
-    // Add a target global address for the retainRV/claimRV runtime function
-    // just before the call target.
-    Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
-    auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
-    Ops.insert(Ops.begin() + 1, GA);
-  } else if (CallConv == CallingConv::ARM64EC_Thunk_X64) {
-    CallOpc = AArch64ISD::CALL_ARM64EC_TO_X64;
-  } else if (GuardWithBTI) {
-    CallOpc = AArch64ISD::CALL_BTI;
-  }
-
   // Returns a chain and a flag for retval copy to use.
-  Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
-
+  Chain = DAG.getNode(Opc, DL, NodeTys, Ops);
   if (IsCFICall)
     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index fbdc4de5617fe..5297a5d12ec13 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -58,6 +58,16 @@ enum NodeType : unsigned {
 
   CALL_BTI, // Function call followed by a BTI instruction.
 
+  // Function call, authenticating the callee value first:
+  // AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands.
+  AUTH_CALL,
+  // AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc,
+  // operands.
+  AUTH_TC_RETURN,
+
+  // Authenticated variant of CALL_RVMARKER.
+  AUTH_CALL_RVMARKER,
+
   COALESCER_BARRIER,
 
   SMSTART,
@@ -911,6 +921,8 @@ class AArch64TargetLowering : public TargetLowering {
     return true;
   }
 
+  bool supportPtrAuthBundles() const override { return true; }
+
   bool supportKCFIBundles() const override { return true; }
 
   MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 55fecc4b4845f..72e35f9442031 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2511,6 +2511,8 @@ bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
   case AArch64::TCRETURNrix17:
   case AArch64::TCRETURNrinotx16:
   case AArch64::TCRETURNriALL:
+  case AArch64::AUTH_TCRETURN:
+  case AArch64::AUTH_TCRETURN_BTI:
     return true;
   }
 }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 17d96370c04a5..c0cda5315b8d3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1564,6 +1564,30 @@ let Predicates = [HasComplxNum, HasNEON] in {
                          (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
 }
 
+def AArch64authcall : SDNode<"AArch64ISD::AUTH_CALL",
+                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
+                                                   SDTCisVT<1, i32>,
+                                                   SDTCisVT<2, i64>,
+                                                   SDTCisVT<3, i64>]>,
+                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                              SDNPVariadic]>;
+
+def AArch64authtcret: SDNode<"AArch64ISD::AUTH_TC_RETURN",
+                             SDTypeProfile<0, 5, [SDTCisPtrTy<0>,
+                                                  SDTCisVT<2, i32>,
+                                                  SDTCisVT<3, i64>,
+                                                  SDTCisVT<4, i64>]>,
+                             [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+def AArch64authcall_rvmarker : SDNode<"AArch64ISD::AUTH_CALL_RVMARKER",
+                                 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
+                                                       SDTCisPtrTy<1>,
+                                                       SDTCisVT<2, i32>,
+                                                       SDTCisVT<3, i64>,
+                                                       SDTCisVT<4, i64>]>,
+                                 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                                  SDNPVariadic]>;
+
 // v8.3a Pointer Authentication
 // These instructions inhabit part of the hint space and so can be used for
 // armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
@@ -1692,6 +1716,35 @@ let Predicates = [HasPAuth] in {
     def BLRABZ  : AuthOneOperand<0b001, 1, "blrabz">;
   }
 
+  // BLRA pseudo, generalized version of BLRAA/BLRAB/Z.
+  // This directly manipulates x16/x17, which are the only registers the OS
+  // guarantees are safe to use for sensitive operations.
+  def BLRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
+                                 GPR64noip:$AddrDisc),
+                    [(AArch64authcall GPR64noip:$Rn, timm:$Key, timm:$Disc,
+                                      GPR64noip:$AddrDisc)]>, Sched<[]> {
+    let isCodeGenOnly = 1;
+    let hasSideEffects = 1;
+    let mayStore = 0;
+    let mayLoad = 0;
+    let isCall = 1;
+    let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
+    let Defs = [X17,LR];
+    let Uses = [SP];
+  }
+
+  def BLRA_RVMARKER : Pseudo<
+        (outs), (ins i64imm:$rvfunc, GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
+                     GPR64noip:$AddrDisc),
+        [(AArch64authcall_rvmarker tglobaladdr:$rvfunc,
+                                   GPR64noip:$Rn, timm:$Key, timm:$Disc,
+                                   GPR64noip:$AddrDisc)]>, Sched<[]> {
+    let isCodeGenOnly = 1;
+    let isCall = 1;
+    let Defs = [X17,LR];
+    let Uses = [SP];
+  }
+
   let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
     def RETAA   : AuthReturn<0b010, 0, "retaa">;
     def RETAB   : AuthReturn<0b010, 1, "retab">;
@@ -1702,6 +1755,32 @@ let Predicates = [HasPAuth] in {
   defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
   defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
 
+  // Size 16: 4 fixed + 8 variable, to compute discriminator.
+  let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Size = 16,
+      Uses = [SP] in {
+    def AUTH_TCRETURN
+      : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff, i32imm:$Key,
+                            i64imm:$Disc, tcGPR64:$AddrDisc),
+               []>, Sched<[WriteBrReg]>;
+    def AUTH_TCRETURN_BTI
+      : Pseudo<(outs), (ins tcGPRx16x17:$dst, i32imm:$FPDiff, i32imm:$Key,
+                            i64imm:$Disc, tcGPR64:$AddrDisc),
+               []>, Sched<[WriteBrReg]>;
+  }
+
+  let Predicates = [TailCallAny] in
+    def : Pat<(AArch64authtcret tcGPR64:$dst, (i32 timm:$FPDiff), (i32 timm:$Key),
+                                (i64 timm:$Disc), tcGPR64:$AddrDisc),
+              (AUTH_TCRETURN tcGPR64:$dst, imm:$FPDiff, imm:$Key, imm:$Disc,
+                             tcGPR64:$AddrDisc)>;
+
+  let Predicates = [TailCallX16X17] in
+    def : Pat<(AArch64authtcret tcGPRx16x17:$dst, (i32 timm:$FPDiff),
+                                (i32 timm:$Key), (i64 timm:$Disc),
+                                tcGPR64:$AddrDisc),
+              (AUTH_TCRETURN_BTI tcGPRx16x17:$dst, imm:$FPDiff, imm:$Key,
+                                 imm:$Disc, tcGPR64:$AddrDisc)>;
+
 }
 
 // v9.5-A pointer authentication extensions
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index c4197ff73187a..53c35aa835bbc 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64CallLowering.h"
+#include "AArch64GlobalISelUtils.h"
 #include "AArch64ISelLowering.h"
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64RegisterInfo.h"
@@ -50,6 +51,7 @@
 #define DEBUG_TYPE "aarch64-call-lowering"
 
 using namespace llvm;
+using namespace AArch64GISelUtils;
 
 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
   : CallLowering(&TLI) {}
@@ -1012,25 +1014,37 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization(
 }
 
 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
-                              bool IsTailCall) {
+                              bool IsTailCall,
+                              std::optional<CallLowering::PointerAuthInfo> &PAI,
+                              MachineRegisterInfo &MRI) {
   const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>();
 
-  if (!IsTailCall)
-    return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
+  if (!IsTailCall) {
+    if (!PAI)
+      return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
+
+    assert(IsIndirect && "authenticated direct call");
+    assert(PAI->Key == 0 || PAI->Key == 1 && "invalid ptrauth key");
+    return AArch64::BLRA;
+  }
 
   if (!IsIndirect)
     return AArch64::TCRETURNdi;
 
-  // When BTI or PAuthLR are enabled, there are restrictions on using x16 and
-  // x17 to hold the function pointer.
+  // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
+  // x16 or x17.
   if (FuncInfo->branchTargetEnforcement()) {
-    if (FuncInfo->branchProtectionPAuthLR())
+    if (PAI)
+      return AArch64::AUTH_TCRETURN_BTI;
+    else if (FuncInfo->branchProtectionPAuthLR())
       return AArch64::TCRETURNrix17;
     else
       return AArch64::TCRETURNrix16x17;
   } else if (FuncInfo->branchProtectionPAuthLR())
     return AArch64::TCRETURNrinotx16;
 
+  if (PAI)
+    return AArch64::AUTH_TCRETURN;
   return AArch64::TCRETURNri;
 }
 
@@ -1066,14 +1080,6 @@ bool AArch64CallLowering::lowerTailCall(
                    Info.CallConv != CallingConv::Tail &&
                    Info.CallConv != CallingConv::SwiftTail;
 
-  // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
-  // register class. Until we can do that, we should fall back here.
-  if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
-    LLVM_DEBUG(
-        dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
-    return false;
-  }
-
   // Find out which ABI gets to decide where things go.
   CallingConv::ID CalleeCC = Info.CallConv;
   CCAssignFn *AssignFnFixed;
@@ -1084,17 +1090,39 @@ bool AArch64CallLowering::lowerTailCall(
   if (!IsSibCall)
     CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
 
-  unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
+  unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true, Info.PAI, MRI);
   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
   MIB.add(Info.Callee);
 
+  // Tell the call which registers are clobbered.
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  auto TRI = Subtarget.getRegisterInfo();
+
   // Byte offset for the tail call. When we are sibcalling, this will always
   // be 0.
   MIB.addImm(0);
 
+  // Authenticated tail calls always take key/discriminator arguments.
+  if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) {
+    assert(Info.PAI->Key == 0 || Info.PAI->Key == 1 && "invalid key");
+    MIB.addImm(Info.PAI->Key);
+
+    Register AddrDisc = 0;
+    uint16_t IntDisc = 0;
+    std::tie(IntDisc, AddrDisc) =
+        extractPtrauthBlendDiscriminators(Info.PAI->Discriminator, MRI);
+
+    MIB.addImm(IntDisc);
+    MIB.addUse(AddrDisc);
+    if (AddrDisc != AArch64::NoRegister) {
+      MIB->getOperand(4).setReg(constrainOperandRegClass(
+          MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
+          *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(),
+          MIB->getOperand(4), 4));
+    }
+  }
+
   // Tell the call which registers are clobbered.
-  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
-  auto TRI = Subtarget.getRegisterInfo();
   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
   if (Subtarget.hasCustomCallingConv())
     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
@@ -1294,7 +1322,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   // be expanded to the call, directly followed by a special marker sequence and
   // a call to an ObjC library function.
   if (Info.CB && objcarc::hasAttachedCallOpBundle(Info.CB))
-    Opc = AArch64::BLR_RVMARKER;
+    Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER;
   // A call to a returns twice function like setjmp must be followed by a bti
   // instruction.
   else if (Info.CB && Info.CB->hasFnAttr(Attribute::ReturnsTwice) &&
@@ -1310,13 +1338,13 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
       MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT);
       Info.Callee = MachineOperand::CreateReg(MIB.getReg(0), false);
     }
-    Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
+    Opc = getCallOpcode(MF, Info.Callee.isReg(), false, Info.PAI, MRI);
   }
 
   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
   unsigned CalleeOpNo = 0;
 
-  if (Opc == AArch64::BLR_RVMARKER) {
+  if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) {
     // Add a target global address for the retainRV/claimRV runtime function
     // just before the call target.
     Function *ARCFn = *objcarc::getAttachedARCFunction(Info.CB);
@@ -1342,6 +1370,26 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
 
   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
 
+  if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) {
+    assert(Info.PAI->Key == 0 || Info.PAI->Key == 1 && "invalid key");
+    MIB.addImm(Info.PAI->Key);
+
+    Register AddrDisc = 0;
+    uint16_t IntDisc = 0;
+    std::tie(IntDisc, AddrDisc) =
+        extractPtrauthBlendDiscriminators(Info.PAI->Discriminator, MRI);
+
+    MIB.addImm(IntDisc);
+    MIB.addUse(AddrDisc);
+    if (AddrDisc != AArch64::NoRegister) {
+      constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
+                               *MF.getSubtarget().getRegBankInfo(), *MIB,
+                               MIB->getDesc(), MIB->getOperand(CalleeOpNo + 3),
+                               CalleeOpNo + 3);
+    }
+  }
+
+  // Tell the call which registers are clobbered.
   if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
     TRI->UpdateCustomCallPreservedMask(MF, &Mask);
   MIB.addRegMask(Mask);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 80fe4bcb8b58f..a2b909205ea84 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -96,6 +96,35 @@ bool AArch64GISelUtils::tryEmitBZero(MachineInstr &MI,
   return true;
 }
 
+std::tuple<uint16_t, Register>
+AArch64GISelUtils::extractPtrauthBlendDiscriminators(Register Disc,
+                                                     MachineRegisterInfo &MRI) {
+  Register AddrDisc = Disc;
+  uint16_t ConstDisc = 0;
+
+  if (auto ConstDiscVal = getIConstantVRegVal(Disc, MRI)) {
+    if (isUInt<16>(ConstDiscVal->getZExtValue())) {
+      ConstDisc = ConstDiscVal->getZExtValue();
+      AddrDisc = AArch64::NoRegister;
+    }
+    return std::make_tuple(ConstDisc, AddrDisc);
+  }
+
+  const MachineInstr *DiscMI = MRI.getVRegDef(Disc);
+  if (!DiscMI || DiscMI->getOpcode() != TargetOpcode::G_INTRINSIC ||
+      DiscMI->getOperand(1).getIntrinsicID() != Intrinsic::ptrauth_blend)
+    return std::make_tuple(ConstDisc, AddrDisc);
+
+  if (auto ConstDiscVal =
+          getIConstantVRegVal(DiscMI->getOperand(3).getReg(), MRI)) {
+    if (isUInt<16>(ConstDiscVal->getZExtValue())) {
+      ConstDisc = ConstDiscVal->getZExtValue();
+      AddrDisc = DiscMI->getOperand(2).getReg();
+    }
+  }
+  return std::make_tuple(ConstDisc, AddrDisc);
+}
+
 void AArch64GISelUtils::changeFCMPPredToAArch64CC(
     const CmpInst::Predicate P, AArch64CC::CondCode &CondCode,
     AArch64CC::CondCode &CondCode2) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
index 791db7efaf0be..9ef833f0fc0ca 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
@@ -53,6 +53,12 @@ bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred,
 /// \returns true if \p MI was replaced with a G_BZERO.
 bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, bool MinSize);
 
+/// Analyze a ptrauth discriminator value to try to find the constant integer
+/// and address parts, cracking a ptrauth_blend intrinsic if there is one.
+/// \returns integer/address disc. parts, with NoRegister if no address disc.
+std::tuple<uint16_t, Register>
+extractPtrauthBlendDiscriminators(Register Disc, MachineRegisterInfo &MRI);
+
 /// Find the AArch64 condition codes necessary to represent \p P for a scalar
 /// floating point comparison.
 ///
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-invoke.ll b/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-invoke.ll
new file mode 100644
index 0000000000000..bd43776687f2e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-invoke.ll
@@ -0,0 +1,183 @@
+; RUN: llc -mtriple arm64e-apple-darwin -o - -global-isel -global-isel-abort=1 -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK
+
+; CHECK-LABEL: _test_invoke_ia_0:
+; CHECK-NEXT: [[FNBEGIN:L.*]]:
+; CHECK-NEXT:  .cfi_startproc
+; CHECK-NEXT:  .cfi_personality 155, ___gxx_personality_v0
+; CHECK-NEXT:  .cfi_lsda 16, [[EXCEPT:Lexception[0-9]+]]
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT:  stp x20, x19, [sp, #-32]!
+; CHECK-NEXT:  stp x29, x30, [sp, #16]
+; CHECK-NEXT:  .cfi_def_cfa_offset 32
+; CHECK-NEXT:  .cfi_offset w30, -8
+; CHECK-NEXT:  .cfi_offset w29, -16
+; CHECK-NEXT:  .cfi_offset w19, -24
+; CHECK-NEXT:  .cfi_offset w20, -32
+; CHECK-NEXT: [[PRECALL:L.*]]:
+; CHECK-NEXT:  blraaz x0
+; CHECK-NEXT:  mov x19, x0
+; CHECK-NEXT: [[POSTCALL:L.*]]:
+; CHECK-NEXT: [[POSTCALL_BB:L.*]]:
+; CHECK-NEXT:  bl _foo
+; CHECK-NEXT:  mov x0, x19
+; CHECK-NEXT:  ldp x29, x30, [sp, #16]
+; CHECK-NEXT:  ldp x20, x19, [sp], #32
+; CHECK-NEXT:  ret
+; CHECK-NEXT: [[LPADBB:LBB[0-9_]+]]:
+; CHECK-NEXT: [[LPAD:L.*]]:
+; CHECK-NEXT:  mov w19, #-1
+; CHECK-NEXT:  b [[POSTCALL_BB]]
+
+; CHECK-LABEL: GCC_except_table{{.*}}:
+; CHECK-NEXT: [[EXCEPT]]:
+; CHECK:       .uleb128 [[POSTCALL]]-[[PRECALL]] ;   Call between [[PRECALL]] and [[POSTCALL]]
+; CHECK-NEXT:  .uleb128 [[LPAD]]-[[FNBEGIN]]     ;     jumps to [[LPAD]]
+; CHECK-NEXT:  .byte 0                           ;   On action: cleanup
+
+define i32 @test_invoke_ia_0(ptr %arg0) #0 personality ptr @__gxx_personality_v0 {
+  %tmp0 = invoke i32 %arg0() [ "ptrauth"(i32 0, i64 0) ] to label %continuebb
+            unwind label %unwindbb
+
+unwindbb:
+  %tmp1 = landingpad { ptr, i32 } cleanup
+  call void @foo()
+  ret i32 -1
+
+continuebb:
+  call void @foo()
+  ret i32 %tmp0
+}
+
+ at _ZTIPKc = external constant ptr
+ at hello_str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+
+; CHECK-LABEL: _test_invoke_ib_42_catch:
+; CHECK-NEXT: [[FNBEGIN:L.*]]:
+; CHECK-NEXT:         .cfi_startproc
+; CHECK-NEXT:         .cfi_personality 155, ___gxx_personality_v0
+; CHECK-NEXT:         .cfi_lsda 16, [[EXCEPT:Lexception[0-9]+]]
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT:         stp x20, x19, [sp, #-32]!
+; CHECK-NEXT:         stp x29, x30, [sp, #16]
+; CHECK-NEXT:         .cfi_def_cfa_offset 32
+; CHECK-NEXT:         .cfi_offset w30, -8
+; CHECK-NEXT:         .cfi_offset w29, -16
+; CHECK-NEXT:         .cfi_offset w19, -24
+; CHECK-NEXT:         .cfi_offset w20, -32
+; CHECK-NEXT:         mov x19, x0
+; CHECK-NEXT:         mov w0, #8
+; CHECK-NEXT:         bl ___cxa_allocate_exception
+; CHECK-NEXT: Lloh{{.*}}:
+; CHECK-NEXT:         adrp [[STR_TMP:x[0-9]+]], l_hello_str at PAGE
+; CHECK-NEXT: Lloh{{.*}}:
+; CHECK-NEXT:         add [[STR:x[0-9]+]], [[STR_TMP]], l_hello_str at PAGEOFF
+; CHECK-NEXT:         str [[STR]], [x0]
+; CHECK-NEXT: [[PRECALL:L.*]]:
+; CHECK-NEXT: Lloh{{.*}}:
+; CHECK-NEXT:         adrp x[[ZTI_TMP:[0-9]+]], __ZTIPKc at GOTPAGE
+; CHECK-NEXT: Lloh{{.*}}:
+; CHECK-NEXT:         ldr x1, [x[[ZTI_TMP]], __ZTIPKc at GOTPAGEOFF]
+; CHECK-NEXT:         mov x2, #0
+; CHECK-NEXT:         mov x17, #42
+; CHECK-NEXT:         blrab x19, x17
+; CHECK-NEXT: [[POSTCALL:L.*]]:
+; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT:         brk #0x1
+
+; CHECK-NEXT: [[LPADBB:LBB[0-9_]+]]:
+; CHECK-NEXT: [[LPAD:L.*]]:
+; CHECK-NEXT:         mov x[[TMP:[0-9]+]], x1
+; CHECK-NEXT:         bl ___cxa_begin_catch
+; CHECK-NEXT:         cmp w[[TMP]], #2
+
+; CHECK-NEXT:         b.ne [[EXITBB:LBB[0-9_]+]]
+; CHECK-NEXT: ; %bb.3:
+; CHECK-NEXT:         bl _bar
+; CHECK-NEXT: [[EXITBB]]:
+; CHECK-NEXT:         bl _foo
+; CHECK-NEXT:         bl ___cxa_end_catch
+; CHECK-NEXT:         ldp x29, x30, [sp, #16]
+; CHECK-NEXT:         ldp x20, x19, [sp], #32     ; 16-byte Folded Reload
+; CHECK-NEXT:         ret
+; CHECK-NEXT:         .loh {{.*}}
+; CHECK-NEXT:         .loh {{.*}}
+; CHECK-NEXT: [[FNEND:L.*]]:
+
+; CHECK-LABEL: GCC_except_table{{.*}}:
+; CHECK-NEXT: [[EXCEPT]]:
+; CHECK-NEXT:         .byte   255                     ; @LPStart Encoding = omit
+; CHECK-NEXT:         .byte   155                     ; @TType Encoding = indirect pcrel sdata4
+; CHECK-NEXT:         .uleb128 [[TT:L.*]]-[[TTREF:L.*]]
+; CHECK-NEXT: [[TTREF]]:
+; CHECK-NEXT:         .byte   1                       ; Call site Encoding = uleb128
+; CHECK-NEXT:         .uleb128 [[CSEND:L.*]]-[[CSBEGIN:L.*]]
+; CHECK-NEXT: [[CSBEGIN]]:
+; CHECK-NEXT:         .uleb128 [[FNBEGIN]]-[[FNBEGIN]]  ; >> Call Site 1 <<
+; CHECK-NEXT:         .uleb128 [[PRECALL]]-[[FNBEGIN]]  ;   Call between [[FNBEGIN]] and [[PRECALL]]
+; CHECK-NEXT:         .byte   0                         ;     has no landing pad
+; CHECK-NEXT:         .byte   0                         ;   On action: cleanup
+; CHECK-NEXT:         .uleb128 [[PRECALL]]-[[FNBEGIN]]  ; >> Call Site 2 <<
+; CHECK-NEXT:         .uleb128 [[POSTCALL]]-[[PRECALL]] ;   Call between [[PRECALL]] and [[POSTCALL]]
+; CHECK-NEXT:         .uleb128 [[LPAD]]-[[FNBEGIN]]     ;     jumps to [[LPAD]]
+; CHECK-NEXT:         .byte   3                         ;   On action: 2
+; CHECK-NEXT:         .uleb128 [[POSTCALL]]-[[FNBEGIN]] ; >> Call Site 3 <<
+; CHECK-NEXT:         .uleb128 [[FNEND]]-[[POSTCALL]]   ;   Call between [[POSTCALL]] and [[FNEND]]
+; CHECK-NEXT:         .byte   0                         ;     has no landing pad
+; CHECK-NEXT:         .byte   0                         ;   On action: cleanup
+; CHECK-NEXT: [[CSEND]]:
+
+; CHECK-NEXT:          .byte   1                       ; >> Action Record 1 <<
+; CHECK-NEXT:                                          ;   Catch TypeInfo 1
+; CHECK-NEXT:          .byte   0                       ;   No further actions
+; CHECK-NEXT:          .byte   2                       ; >> Action Record 2 <<
+; CHECK-NEXT:                                          ;   Catch TypeInfo 2
+; CHECK-NEXT:          .byte   125                     ;   Continue to action 1
+; CHECK-NEXT:          .p2align   2
+; CHECK-NEXT:                                          ; >> Catch TypeInfos <<
+; CHECK-NEXT: [[TI:L.*]]:                              ; TypeInfo 2
+; CHECK-NEXT:          .long   __ZTIPKc at GOT-[[TI]]
+; CHECK-NEXT:          .long   0                       ; TypeInfo 1
+
+; CHECK-NEXT: [[TT]]:
+
+define void @test_invoke_ib_42_catch(ptr %fptr) #0 personality ptr @__gxx_personality_v0 {
+  %tmp0 = call ptr @__cxa_allocate_exception(i64 8)
+  store ptr getelementptr inbounds ([6 x i8], ptr @hello_str, i64 0, i64 0), ptr %tmp0, align 8
+  invoke void %fptr(ptr %tmp0, ptr @_ZTIPKc, ptr null) [ "ptrauth"(i32 1, i64 42) ]
+          to label %continuebb unwind label %catchbb
+
+catchbb:
+  %tmp2 = landingpad { ptr, i32 }
+          catch ptr @_ZTIPKc
+          catch ptr null
+  %tmp3 = extractvalue { ptr, i32 } %tmp2, 0
+  %tmp4 = extractvalue { ptr, i32 } %tmp2, 1
+  %tmp5 = call i32 @llvm.eh.typeid.for(ptr @_ZTIPKc)
+  %tmp6 = icmp eq i32 %tmp4, %tmp5
+  %tmp7 = call ptr @__cxa_begin_catch(ptr %tmp3)
+  br i1 %tmp6, label %PKc_catchbb, label %any_catchbb
+
+PKc_catchbb:
+  call void @bar(ptr %tmp7)
+  br label %any_catchbb
+
+any_catchbb:
+  call void @foo()
+  call void @__cxa_end_catch()
+  ret void
+
+continuebb:
+  unreachable
+}
+
+declare void @foo()
+declare void @bar(ptr)
+
+declare i32 @__gxx_personality_v0(...)
+declare ptr @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(ptr, ptr, ptr)
+declare i32 @llvm.eh.typeid.for(ptr)
+declare ptr @__cxa_begin_catch(ptr)
+declare void @__cxa_end_catch()
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll b/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll
index 833a6d5b1d1da..3d8a5976559ba 100644
--- a/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll
+++ b/llvm/test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple aarch64 -mattr=+bti < %s | FileCheck %s
-; RUN: llc -mtriple aarch64 -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mattr=+bti %s -verify-machineinstrs -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,FALLBACK
-
-; FALLBACK: remark: <unknown>:0:0: unable to translate instruction: call: '  tail call void %p()' (in function: bti_enabled)
+; RUN: llc -mtriple aarch64 -global-isel -mattr=+bti < %s | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64"
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-bti-call.ll b/llvm/test/CodeGen/AArch64/ptrauth-bti-call.ll
new file mode 100644
index 0000000000000..3a992929a8003
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-bti-call.ll
@@ -0,0 +1,105 @@
+; RUN: llc -mtriple arm64e-apple-darwin -mattr=+bti              -asm-verbose=false -o - %s | FileCheck %s
+; RUN: llc -mtriple arm64e-apple-darwin -mattr=+bti -global-isel -asm-verbose=false -o - %s | FileCheck %s
+; RUN: llc -mtriple arm64e-apple-darwin -mattr=+bti -fast-isel   -asm-verbose=false -o - %s | FileCheck %s
+
+; ptrauth tail-calls can only use x16/x17 with BTI.
+
+; CHECK-LABEL: _test_tailcall_ia_0:
+; CHECK-NEXT:  bti c
+; CHECK-NEXT:  mov x16, x0
+; CHECK-NEXT:  braaz x16
+define i32 @test_tailcall_ia_0(i32 ()* %arg0) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 0) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ib_0:
+; CHECK-NEXT:  bti c
+; CHECK-NEXT:  mov x16, x0
+; CHECK-NEXT:  brabz x16
+define i32 @test_tailcall_ib_0(i32 ()* %arg0) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 0) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ia_imm:
+; CHECK-NEXT:  bti c
+; CHECK-NEXT:  mov x16, x0
+; CHECK-NEXT:  mov x17, #42
+; CHECK-NEXT:  braa x16, x17
+define i32 @test_tailcall_ia_imm(i32 ()* %arg0) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 42) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ib_imm:
+; CHECK-NEXT:  bti c
+; CHECK-NEXT:  mov x16, x0
+; CHECK-NEXT:  mov x17, #42
+; CHECK-NEXT:  brab x16, x17
+define i32 @test_tailcall_ib_imm(i32 ()* %arg0) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 42) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ia_var:
+; CHECK-NEXT:  bti c
+; CHECK-NEXT:  mov x16, x0
+; CHECK-NEXT:  ldr x0, [x1]
+; CHECK-NEXT:  braa x16, x0
+define i32 @test_tailcall_ia_var(i32 ()* %arg0, i64* %arg1) #0 {
+  %tmp0 = load i64, i64* %arg1
+  %tmp1 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 %tmp0) ]
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: _test_tailcall_ib_var:
+; CHECK-NEXT:  bti c
+; CHECK-NEXT:  mov x16, x0
+; CHECK-NEXT:  ldr x0, [x1]
+; CHECK-NEXT:  brab x16, x0
+define i32 @test_tailcall_ib_var(i32 ()* %arg0, i64* %arg1) #0 {
+  %tmp0 = load i64, i64* %arg1
+  %tmp1 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 %tmp0) ]
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: _test_tailcall_ia_arg:
+; CHECK-NEXT:  bti c
+; CHECK-NEXT:  mov x16, x0
+; CHECK-NEXT:  braa x16, x1
+define i32 @test_tailcall_ia_arg(i32 ()* %arg0, i64 %arg1) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 %arg1) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ib_arg:
+; CHECK-NEXT:  bti c
+; CHECK-NEXT:  mov x16, x0
+; CHECK-NEXT:  brab x16, x1
+define i32 @test_tailcall_ib_arg(i32 ()* %arg0, i64 %arg1) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 %arg1) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ia_arg_ind:
+; CHECK-NEXT:  bti c
+; CHECK-NEXT:  ldr x16, [x0]
+; CHECK-NEXT:  braa x16, x1
+define i32 @test_tailcall_ia_arg_ind(i32 ()** %arg0, i64 %arg1) #0 {
+  %tmp0 = load i32 ()*, i32 ()** %arg0
+  %tmp1 = tail call i32 %tmp0() [ "ptrauth"(i32 0, i64 %arg1) ]
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: _test_tailcall_ib_arg_ind:
+; CHECK-NEXT:  bti c
+; CHECK-NEXT:  ldr x16, [x0]
+; CHECK-NEXT:  brab x16, x1
+define i32 @test_tailcall_ib_arg_ind(i32 ()** %arg0, i64 %arg1) #0 {
+  %tmp0 = load i32 ()*, i32 ()** %arg0
+  %tmp1 = tail call i32 %tmp0() [ "ptrauth"(i32 1, i64 %arg1) ]
+  ret i32 %tmp1
+}
+
+attributes #0 = { nounwind "branch-target-enforcement"="true" }
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-call-rv-marker.ll b/llvm/test/CodeGen/AArch64/ptrauth-call-rv-marker.ll
new file mode 100644
index 0000000000000..9cf77b125e107
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-call-rv-marker.ll
@@ -0,0 +1,154 @@
+; RUN: llc -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -global-isel -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "arm64e-apple-iphoneos"
+
+declare i8* @foo0(i32)
+declare i8* @foo1()
+
+declare void @llvm.objc.release(i8*)
+declare i8* @llvm.objc.retainAutoreleasedReturnValue(i8*)
+declare i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8*)
+
+declare void @foo2(i8*)
+
+declare void @foo(i64, i64, i64)
+
+define void @rv_marker_ptrauth_blraa(i8* ()** %arg0, i64 %arg1) {
+; CHECK-LABEL: rv_marker_ptrauth_blraa
+; CHECK:         ldr [[ADDR:x[0-9]+]], [
+; CHECK-NEXT:    blraa [[ADDR]], x1
+; CHECK-NEXT:    mov x29, x29
+; CHECK-NEXT:    bl objc_retainAutoreleasedReturnValue
+;
+entry:
+  %tmp0 = load i8* ()*, i8* ()** %arg0
+  %call0 = call i8* %tmp0() [ "ptrauth"(i32 0, i64 %arg1), "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
+  tail call void @foo2(i8* %call0)
+  tail call void @llvm.objc.release(i8* %call0)
+  ret void
+}
+
+define void @rv_marker_ptrauth_blraa_unsafeClaim(i8* ()** %arg0, i64 %arg1) {
+; CHECK-LABEL: rv_marker_ptrauth_blraa_unsafeClaim
+; CHECK:         ldr [[ADDR:x[0-9]+]], [
+; CHECK-NEXT:    blraa [[ADDR]], x1
+; CHECK-NEXT:    mov x29, x29
+; CHECK-NEXT:    bl objc_unsafeClaimAutoreleasedReturnValue
+;
+entry:
+  %tmp0 = load i8* ()*, i8* ()** %arg0
+  %call0 = call i8* %tmp0() [ "ptrauth"(i32 0, i64 %arg1), "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.unsafeClaimAutoreleasedReturnValue) ]
+  tail call void @foo2(i8* %call0)
+  tail call void @llvm.objc.release(i8* %call0)
+  ret void
+}
+
+define void @rv_marker_ptrauth_blraa_disc_imm16(i8* ()** %arg0) {
+; CHECK-LABEL: rv_marker_ptrauth_blraa_disc_imm16
+; CHECK:         ldr [[ADDR:x[0-9]+]], [
+; CHECK-NEXT:    mov x17, #45431
+; CHECK-NEXT:    blrab [[ADDR]], x17
+; CHECK-NEXT:    mov x29, x29
+; CHECK-NEXT:    bl objc_retainAutoreleasedReturnValue
+;
+  %tmp0 = load i8* ()*, i8* ()** %arg0
+  %call0 = call i8* %tmp0() [ "ptrauth"(i32 1, i64 45431), "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
+  tail call void @foo2(i8* %call0)
+  tail call void @llvm.objc.release(i8* %call0)
+  ret void
+}
+
+define void @rv_marker_ptrauth_blraa_multiarg(i8* (i64, i64, i64)** %arg0, i64 %arg1, i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: rv_marker_ptrauth_blraa_multiarg
+; CHECK:         mov  [[TMP:x[0-9]+]], x1
+; CHECK-DAG:     ldr [[ADDR:x[0-9]+]]
+; CHECK-DAG:     mov x0, x4
+; CHECK-DAG:     mov x1, x3
+; CHECK-NEXT:    blraa [[ADDR]], [[TMP]]
+; CHECK-NEXT:    mov x29, x29
+; CHECK-NEXT:   bl objc_retainAutoreleasedReturnValue
+;
+entry:
+  %tmp0 = load i8* (i64, i64, i64)*, i8* (i64, i64, i64)** %arg0
+  %call0 = call i8* %tmp0(i64 %c, i64 %b, i64 %a) [ "ptrauth"(i32 0, i64 %arg1), "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
+  tail call void @foo2(i8* %call0)
+  tail call void @llvm.objc.release(i8* %call0)
+  ret void
+}
+
+define void @rv_marker_ptrauth_blrab(i8* ()** %arg0, i64 %arg1) {
+; CHECK-LABEL: rv_marker_ptrauth_blrab
+; CHECK:         ldr [[ADDR:x[0-9]+]], [
+; CHECK-NEXT:    blrab [[ADDR]], x1
+; CHECK-NEXT:   mov x29, x29
+; CHECK-NEXT:   bl objc_retainAutoreleasedReturnValue
+;
+  %tmp0 = load i8* ()*, i8* ()** %arg0
+  %call0 = call i8* %tmp0() [ "ptrauth"(i32 1, i64 %arg1), "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
+  tail call void @foo2(i8* %call0)
+  tail call void @llvm.objc.release(i8* %call0)
+  ret void
+}
+
+define void @rv_marker_ptrauth_blrab_disc_imm16(i8* ()** %arg0) {
+; CHECK-LABEL: rv_marker_ptrauth_blrab_disc_imm16
+; CHECK:         ldr [[ADDR:x[0-9]+]], [
+; CHECK-NEXT:    mov x17, #256
+; CHECK-NEXT:    blrab [[ADDR]], x17
+; CHECK-NEXT:   mov x29, x29
+; CHECK-NEXT:   bl objc_retainAutoreleasedReturnValue
+;
+  %tmp0 = load i8* ()*, i8* ()** %arg0
+  %call0 = call i8* %tmp0() [ "ptrauth"(i32 1, i64 256), "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
+  tail call void @foo2(i8* %call0)
+  tail call void @llvm.objc.release(i8* %call0)
+  ret void
+}
+
+define void @rv_marker_ptrauth_blraaz(i8* ()** %arg0) {
+; CHECK-LABEL: rv_marker_ptrauth_blraaz
+; CHECK:         ldr [[ADDR:x[0-9]+]], [
+; CHECK-NEXT:    blraaz [[ADDR]]
+; CHECK-NEXT:   mov x29, x29
+; CHECK-NEXT:   bl objc_retainAutoreleasedReturnValue
+;
+  %tmp0 = load i8* ()*, i8* ()** %arg0
+  %call0 = call i8* %tmp0() [ "ptrauth"(i32 0, i64 0), "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
+  tail call void @foo2(i8* %call0)
+  tail call void @llvm.objc.release(i8* %call0)
+  ret void
+}
+
+define void @rv_marker_ptrauth_blrabz(i8* ()** %arg0) {
+; CHECK-LABEL: rv_marker_ptrauth_blrabz
+; CHECK:         ldr [[ADDR:x[0-9]+]], [
+; CHECK-NEXT:    blrabz [[ADDR]]
+; CHECK-NEXT:   mov x29, x29
+; CHECK-NEXT:   bl objc_retainAutoreleasedReturnValue
+;
+  %tmp0 = load i8* ()*, i8* ()** %arg0
+  %call0 = call i8* %tmp0() [ "ptrauth"(i32 1, i64 0), "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
+  tail call void @foo2(i8* %call0)
+  tail call void @llvm.objc.release(i8* %call0)
+  ret void
+}
+
+define void @rv_marker_ptrauth_blrabz_multiarg(i8* (i64, i64, i64)** %arg0, i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: rv_marker_ptrauth_blrabz_multiarg
+; CHECK:         mov  [[TMP:x[0-9]+]], x1
+; CHECK-DAG:     ldr [[ADDR:x[0-9]+]], [
+; CHECK-DAG:     mov x0, x3
+; CHECK-DAG:     mov x1, x2
+; CHECK-DAG:     mov x2, [[TMP]]
+; CHECK-NEXT:    blrabz [[ADDR]]
+; CHECK-NEXT:    mov x29, x29
+; CHECK-NEXT:    bl objc_retainAutoreleasedReturnValue
+;
+  %tmp0 = load i8* (i64, i64, i64)*, i8* (i64, i64, i64)** %arg0
+  %call0 = call i8* %tmp0(i64 %c, i64 %b, i64 %a) [ "ptrauth"(i32 1, i64 0), "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
+  tail call void @foo2(i8* %call0)
+  tail call void @llvm.objc.release(i8* %call0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-call.ll b/llvm/test/CodeGen/AArch64/ptrauth-call.ll
new file mode 100644
index 0000000000000..585ac0e75e6ac
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-call.ll
@@ -0,0 +1,195 @@
+; RUN: llc -mtriple arm64e-apple-darwin                                   -asm-verbose=false -o - %s | FileCheck %s
+; RUN: llc -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -asm-verbose=false -o - %s | FileCheck %s
+; RUN: llc -mtriple arm64e-apple-darwin -emit-call-site-info              -asm-verbose=false -o - %s | FileCheck %s
+
+; CHECK-LABEL: _test_call_ia_0:
+; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:  blraaz x0
+; CHECK-NEXT:  ldp x29, x30, [sp], #16
+; CHECK-NEXT:  ret
+define i32 @test_call_ia_0(i32 ()* %arg0) #0 {
+  %tmp0 = call i32 %arg0() [ "ptrauth"(i32 0, i64 0) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_call_ib_0:
+; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:  blrabz x0
+; CHECK-NEXT:  ldp x29, x30, [sp], #16
+; CHECK-NEXT:  ret
+define i32 @test_call_ib_0(i32 ()* %arg0) #0 {
+  %tmp0 = call i32 %arg0() [ "ptrauth"(i32 1, i64 0) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ia_0:
+; CHECK-NEXT:  braaz x0
+define i32 @test_tailcall_ia_0(i32 ()* %arg0) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 0) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ib_0:
+; CHECK-NEXT:  brabz x0
+define i32 @test_tailcall_ib_0(i32 ()* %arg0) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 0) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_call_ia_imm:
+; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:  mov x17, #42
+; CHECK-NEXT:  blraa x0, x17
+; CHECK-NEXT:  ldp x29, x30, [sp], #16
+; CHECK-NEXT:  ret
+define i32 @test_call_ia_imm(i32 ()* %arg0) #0 {
+  %tmp0 = call i32 %arg0() [ "ptrauth"(i32 0, i64 42) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_call_ib_imm:
+; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:  mov x17, #42
+; CHECK-NEXT:  blrab x0, x17
+; CHECK-NEXT:  ldp x29, x30, [sp], #16
+; CHECK-NEXT:  ret
+define i32 @test_call_ib_imm(i32 ()* %arg0) #0 {
+  %tmp0 = call i32 %arg0() [ "ptrauth"(i32 1, i64 42) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ia_imm:
+; CHECK-NEXT:  mov x16, #42
+; CHECK-NEXT:  braa x0, x16
+define i32 @test_tailcall_ia_imm(i32 ()* %arg0) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 42) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ib_imm:
+; CHECK-NEXT:  mov x16, #42
+; CHECK-NEXT:  brab x0, x16
+define i32 @test_tailcall_ib_imm(i32 ()* %arg0) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 42) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_call_ia_var:
+; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:  ldr x8, [x1]
+; CHECK-NEXT:  blraa x0, x8
+; CHECK-NEXT:  ldp x29, x30, [sp], #16
+; CHECK-NEXT:  ret
+define i32 @test_call_ia_var(i32 ()* %arg0, i64* %arg1) #0 {
+  %tmp0 = load i64, i64* %arg1
+  %tmp1 = call i32 %arg0() [ "ptrauth"(i32 0, i64 %tmp0) ]
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: _test_call_ib_var:
+; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:  ldr x8, [x1]
+; CHECK-NEXT:  blrab x0, x8
+; CHECK-NEXT:  ldp x29, x30, [sp], #16
+; CHECK-NEXT:  ret
+define i32 @test_call_ib_var(i32 ()* %arg0, i64* %arg1) #0 {
+  %tmp0 = load i64, i64* %arg1
+  %tmp1 = call i32 %arg0() [ "ptrauth"(i32 1, i64 %tmp0) ]
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: _test_tailcall_ia_var:
+; CHECK-NEXT:  ldr x1, [x1]
+; CHECK-NEXT:  braa x0, x1
+define i32 @test_tailcall_ia_var(i32 ()* %arg0, i64* %arg1) #0 {
+  %tmp0 = load i64, i64* %arg1
+  %tmp1 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 %tmp0) ]
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: _test_tailcall_ib_var:
+; CHECK-NEXT:  ldr x1, [x1]
+; CHECK-NEXT:  brab x0, x1
+define i32 @test_tailcall_ib_var(i32 ()* %arg0, i64* %arg1) #0 {
+  %tmp0 = load i64, i64* %arg1
+  %tmp1 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 %tmp0) ]
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: _test_call_ia_arg:
+; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:  blraa x0, x1
+; CHECK-NEXT:  ldp x29, x30, [sp], #16
+; CHECK-NEXT:  ret
+define i32 @test_call_ia_arg(i32 ()* %arg0, i64 %arg1) #0 {
+  %tmp0 = call i32 %arg0() [ "ptrauth"(i32 0, i64 %arg1) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_call_ib_arg:
+; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:  blrab x0, x1
+; CHECK-NEXT:  ldp x29, x30, [sp], #16
+; CHECK-NEXT:  ret
+define i32 @test_call_ib_arg(i32 ()* %arg0, i64 %arg1) #0 {
+  %tmp0 = call i32 %arg0() [ "ptrauth"(i32 1, i64 %arg1) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ia_arg:
+; CHECK-NEXT:  braa x0, x1
+define i32 @test_tailcall_ia_arg(i32 ()* %arg0, i64 %arg1) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 %arg1) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_tailcall_ib_arg:
+; CHECK-NEXT:  brab x0, x1
+define i32 @test_tailcall_ib_arg(i32 ()* %arg0, i64 %arg1) #0 {
+  %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 %arg1) ]
+  ret i32 %tmp0
+}
+
+; CHECK-LABEL: _test_call_ia_arg_ind:
+; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:  ldr x8, [x0]
+; CHECK-NEXT:  blraa x8, x1
+; CHECK-NEXT:  ldp x29, x30, [sp], #16
+; CHECK-NEXT:  ret
+define i32 @test_call_ia_arg_ind(i32 ()** %arg0, i64 %arg1) #0 {
+  %tmp0 = load i32 ()*, i32 ()** %arg0
+  %tmp1 = call i32 %tmp0() [ "ptrauth"(i32 0, i64 %arg1) ]
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: _test_call_ib_arg_ind:
+; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:  ldr x8, [x0]
+; CHECK-NEXT:  blrab x8, x1
+; CHECK-NEXT:  ldp x29, x30, [sp], #16
+; CHECK-NEXT:  ret
+define i32 @test_call_ib_arg_ind(i32 ()** %arg0, i64 %arg1) #0 {
+  %tmp0 = load i32 ()*, i32 ()** %arg0
+  %tmp1 = call i32 %tmp0() [ "ptrauth"(i32 1, i64 %arg1) ]
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: _test_tailcall_ia_arg_ind:
+; CHECK-NEXT:  ldr x0, [x0]
+; CHECK-NEXT:  braa x0, x1
+define i32 @test_tailcall_ia_arg_ind(i32 ()** %arg0, i64 %arg1) #0 {
+  %tmp0 = load i32 ()*, i32 ()** %arg0
+  %tmp1 = tail call i32 %tmp0() [ "ptrauth"(i32 0, i64 %arg1) ]
+  ret i32 %tmp1
+}
+
+; CHECK-LABEL: _test_tailcall_ib_arg_ind:
+; CHECK-NEXT:  ldr x0, [x0]
+; CHECK-NEXT:  brab x0, x1
+define i32 @test_tailcall_ib_arg_ind(i32 ()** %arg0, i64 %arg1) #0 {
+  %tmp0 = load i32 ()*, i32 ()** %arg0
+  %tmp1 = tail call i32 %tmp0() [ "ptrauth"(i32 1, i64 %arg1) ]
+  ret i32 %tmp1
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-invoke.ll b/llvm/test/CodeGen/AArch64/ptrauth-invoke.ll
new file mode 100644
index 0000000000000..bea699dbee37a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-invoke.ll
@@ -0,0 +1,189 @@
+; RUN: llc -mtriple arm64e-apple-darwin -o - %s | FileCheck %s --check-prefixes=CHECK,SDAG
+; RUN: llc -mtriple arm64e-apple-darwin -fast-isel -o - %s | FileCheck %s --check-prefixes=CHECK,FISEL
+
+; CHECK-LABEL: _test_invoke_ia_0:
+; CHECK-NEXT: [[FNBEGIN:L.*]]:
+; CHECK-NEXT:  .cfi_startproc
+; CHECK-NEXT:  .cfi_personality 155, ___gxx_personality_v0
+; CHECK-NEXT:  .cfi_lsda 16, [[EXCEPT:Lexception[0-9]+]]
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT:  stp x20, x19, [sp, #-32]!
+; CHECK-NEXT:  stp x29, x30, [sp, #16]
+; CHECK-NEXT:  .cfi_def_cfa_offset 32
+; CHECK-NEXT:  .cfi_offset w30, -8
+; CHECK-NEXT:  .cfi_offset w29, -16
+; CHECK-NEXT:  .cfi_offset w19, -24
+; CHECK-NEXT:  .cfi_offset w20, -32
+; CHECK-NEXT: [[PRECALL:L.*]]:
+; CHECK-NEXT:  blraaz x0
+; CHECK-NEXT: [[POSTCALL:L.*]]:
+; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT:  mov x19, x0
+; CHECK-NEXT: [[CALLBB:L.*]]:
+; CHECK-NEXT:  bl _foo
+; CHECK-NEXT:  mov x0, x19
+; CHECK-NEXT:  ldp x29, x30, [sp, #16]
+; CHECK-NEXT:  ldp x20, x19, [sp], #32
+; CHECK-NEXT:  ret
+; CHECK-NEXT: [[LPADBB:LBB[0-9_]+]]:
+; CHECK-NEXT: [[LPAD:L.*]]:
+; CHECK-NEXT:  mov w19, #-1
+; CHECK-NEXT:  b [[CALLBB]]
+
+; CHECK-LABEL: GCC_except_table{{.*}}:
+; CHECK-NEXT: [[EXCEPT]]:
+; CHECK:       .uleb128 [[POSTCALL]]-[[PRECALL]] ;   Call between [[PRECALL]] and [[POSTCALL]]
+; CHECK-NEXT:  .uleb128 [[LPAD]]-[[FNBEGIN]]     ;     jumps to [[LPAD]]
+; CHECK-NEXT:  .byte 0                           ;   On action: cleanup
+
+define i32 @test_invoke_ia_0(ptr %arg0) #0 personality ptr @__gxx_personality_v0 {
+  %tmp0 = invoke i32 %arg0() [ "ptrauth"(i32 0, i64 0) ] to label %continuebb
+            unwind label %unwindbb
+
+unwindbb:
+  %tmp1 = landingpad { ptr, i32 } cleanup
+  call void @foo()
+  ret i32 -1
+
+continuebb:
+  call void @foo()
+  ret i32 %tmp0
+}
+
+ at _ZTIPKc = external constant ptr
+ at hello_str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+
+; CHECK-LABEL: _test_invoke_ib_42_catch:
+; CHECK-NEXT: [[FNBEGIN:L.*]]:
+; CHECK-NEXT:         .cfi_startproc
+; CHECK-NEXT:         .cfi_personality 155, ___gxx_personality_v0
+; CHECK-NEXT:         .cfi_lsda 16, [[EXCEPT:Lexception[0-9]+]]
+; CHECK-NEXT: ; %bb.0:
+; CHECK-NEXT:         stp x20, x19, [sp, #-32]!
+; CHECK-NEXT:         stp x29, x30, [sp, #16]
+; CHECK-NEXT:         .cfi_def_cfa_offset 32
+; CHECK-NEXT:         .cfi_offset w30, -8
+; CHECK-NEXT:         .cfi_offset w29, -16
+; CHECK-NEXT:         .cfi_offset w19, -24
+; CHECK-NEXT:         .cfi_offset w20, -32
+; CHECK-NEXT:         mov x19, x0
+; CHECK-NEXT:         mov w0, #8
+; CHECK-NEXT:         bl ___cxa_allocate_exception
+; CHECK-NEXT: Lloh{{.*}}:
+; CHECK-NEXT:         adrp x8, l_hello_str at PAGE
+; CHECK-NEXT: Lloh{{.*}}:
+; CHECK-NEXT:         add x8, x8, l_hello_str at PAGEOFF
+; CHECK-NEXT:         str x8, [x0]
+; CHECK-NEXT: [[PRECALL:L.*]]:
+; CHECK-NEXT: Lloh{{.*}}:
+; CHECK-NEXT:         adrp x1, __ZTIPKc at GOTPAGE
+; CHECK-NEXT: Lloh{{.*}}:
+; CHECK-NEXT:         ldr x1, [x1, __ZTIPKc at GOTPAGEOFF]
+; CHECK-NEXT:         mov x2, #0
+; CHECK-NEXT:         mov x17, #42
+; CHECK-NEXT:         blrab x19, x17
+; CHECK-NEXT: [[POSTCALL:L.*]]:
+; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT:         brk #0x1
+; CHECK-NEXT: [[LPADBB:LBB[0-9_]+]]:
+; CHECK-NEXT: [[LPAD:L.*]]:
+; CHECK-NEXT:         mov x19, x1
+
+; SDAG-NEXT:          bl ___cxa_begin_catch
+; SDAG-NEXT:          cmp     w19, #2
+
+; FISEL-NEXT:         mov w20, #2
+; FISEL-NEXT:         bl ___cxa_begin_catch
+; FISEL-NEXT:         cmp w19, w20
+
+; CHECK-NEXT:         b.ne [[EXITBB:LBB[0-9_]+]]
+; CHECK-NEXT: ; %bb.3:
+; CHECK-NEXT:         bl _bar
+; CHECK-NEXT: [[EXITBB]]:
+; CHECK-NEXT:         bl _foo
+; CHECK-NEXT:         bl ___cxa_end_catch
+; CHECK-NEXT:         ldp x29, x30, [sp, #16]
+; CHECK-NEXT:         ldp x20, x19, [sp], #32
+; CHECK-NEXT:         ret
+; CHECK-NEXT:         .loh {{.*}}
+; CHECK-NEXT:         .loh {{.*}}
+; CHECK-NEXT: [[FNEND:L.*]]:
+
+; CHECK-LABEL: GCC_except_table{{.*}}:
+; CHECK-NEXT: [[EXCEPT]]:
+; CHECK-NEXT:         .byte   255                     ; @LPStart Encoding = omit
+; CHECK-NEXT:         .byte   155                     ; @TType Encoding = indirect pcrel sdata4
+; CHECK-NEXT:         .uleb128 [[TT:L.*]]-[[TTREF:L.*]]
+; CHECK-NEXT: [[TTREF]]:
+; CHECK-NEXT:         .byte   1                       ; Call site Encoding = uleb128
+; CHECK-NEXT:         .uleb128 [[CSEND:L.*]]-[[CSBEGIN:L.*]]
+; CHECK-NEXT: [[CSBEGIN]]:
+; CHECK-NEXT:         .uleb128 [[FNBEGIN]]-[[FNBEGIN]]  ; >> Call Site 1 <<
+; CHECK-NEXT:         .uleb128 [[PRECALL]]-[[FNBEGIN]]  ;   Call between [[FNBEGIN]] and [[PRECALL]]
+; CHECK-NEXT:         .byte   0                         ;     has no landing pad
+; CHECK-NEXT:         .byte   0                         ;   On action: cleanup
+; CHECK-NEXT:         .uleb128 [[PRECALL]]-[[FNBEGIN]]  ; >> Call Site 2 <<
+; CHECK-NEXT:         .uleb128 [[POSTCALL]]-[[PRECALL]] ;   Call between [[PRECALL]] and [[POSTCALL]]
+; CHECK-NEXT:         .uleb128 [[LPAD]]-[[FNBEGIN]]     ;     jumps to [[LPAD]]
+; CHECK-NEXT:         .byte   3                         ;   On action: 2
+; CHECK-NEXT:         .uleb128 [[POSTCALL]]-[[FNBEGIN]] ; >> Call Site 3 <<
+; CHECK-NEXT:         .uleb128 [[FNEND]]-[[POSTCALL]]   ;   Call between [[POSTCALL]] and [[FNEND]]
+; CHECK-NEXT:         .byte   0                         ;     has no landing pad
+; CHECK-NEXT:         .byte   0                         ;   On action: cleanup
+; CHECK-NEXT: [[CSEND]]:
+
+; CHECK-NEXT:          .byte   1                       ; >> Action Record 1 <<
+; CHECK-NEXT:                                          ;   Catch TypeInfo 1
+; CHECK-NEXT:          .byte   0                       ;   No further actions
+; CHECK-NEXT:          .byte   2                       ; >> Action Record 2 <<
+; CHECK-NEXT:                                          ;   Catch TypeInfo 2
+; CHECK-NEXT:          .byte   125                     ;   Continue to action 1
+; CHECK-NEXT:          .p2align   2
+; CHECK-NEXT:                                          ; >> Catch TypeInfos <<
+; CHECK-NEXT: [[TI:L.*]]:                              ; TypeInfo 2
+; CHECK-NEXT:          .long   __ZTIPKc at GOT-[[TI]]
+; CHECK-NEXT:          .long   0                       ; TypeInfo 1
+
+; CHECK-NEXT: [[TT]]:
+
+define void @test_invoke_ib_42_catch(ptr %fptr) #0 personality ptr @__gxx_personality_v0 {
+  %tmp0 = call ptr @__cxa_allocate_exception(i64 8)
+  store ptr getelementptr inbounds ([6 x i8], ptr @hello_str, i64 0, i64 0), ptr %tmp0, align 8
+  invoke void %fptr(ptr %tmp0, ptr @_ZTIPKc, ptr null) [ "ptrauth"(i32 1, i64 42) ]
+          to label %continuebb unwind label %catchbb
+
+catchbb:
+  %tmp2 = landingpad { ptr, i32 }
+          catch ptr @_ZTIPKc
+          catch ptr null
+  %tmp3 = extractvalue { ptr, i32 } %tmp2, 0
+  %tmp4 = extractvalue { ptr, i32 } %tmp2, 1
+  %tmp5 = call i32 @llvm.eh.typeid.for(ptr @_ZTIPKc)
+  %tmp6 = icmp eq i32 %tmp4, %tmp5
+  %tmp7 = call ptr @__cxa_begin_catch(ptr %tmp3)
+  br i1 %tmp6, label %PKc_catchbb, label %any_catchbb
+
+PKc_catchbb:
+  call void @bar(ptr %tmp7)
+  br label %any_catchbb
+
+any_catchbb:
+  call void @foo()
+  call void @__cxa_end_catch()
+  ret void
+
+continuebb:
+  unreachable
+}
+
+declare void @foo()
+declare void @bar(ptr)
+
+declare i32 @__gxx_personality_v0(...)
+declare ptr @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(ptr, ptr, ptr)
+declare i32 @llvm.eh.typeid.for(ptr)
+declare ptr @__cxa_begin_catch(ptr)
+declare void @__cxa_end_catch()
+
+attributes #0 = { nounwind }

>From a742d682321d5cf1d26446ca1405b6be99052191 Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed at bougacha.org>
Date: Thu, 23 May 2024 12:49:09 -0700
Subject: [PATCH 3/5] Address review feedback.

- test invoke for ELF as well
- consolidate GISel/SDAG invoke test
- check key/disc validity in consistent ways
- misc. nits
---
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  24 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |   8 +-
 .../AArch64/GISel/AArch64CallLowering.cpp     |  24 +-
 .../AArch64/GlobalISel/ptrauth-invoke.ll      | 183 ----------
 llvm/test/CodeGen/AArch64/ptrauth-invoke.ll   | 321 +++++++++++-------
 5 files changed, 237 insertions(+), 323 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-invoke.ll

diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 22af1f22f1f25..eed2b3114ed93 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1552,18 +1552,20 @@ void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
   unsigned InstsEmitted = 0;
 
   unsigned BrTarget = MI->getOperand(0).getReg();
+
   auto Key = (AArch64PACKey::ID)MI->getOperand(1).getImm();
+  assert((Key == AArch64PACKey::IA || Key == AArch64PACKey::IB) &&
+         "Invalid auth call key");
+
   uint64_t Disc = MI->getOperand(2).getImm();
+  assert(isUInt<16>(Disc));
+
   unsigned AddrDisc = MI->getOperand(3).getReg();
 
   // Compute discriminator into x17
-  assert(isUInt<16>(Disc));
   unsigned DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, InstsEmitted);
   bool IsZeroDisc = DiscReg == AArch64::XZR;
 
-  assert((Key == AArch64PACKey::IA || Key == AArch64PACKey::IB) &&
-         "Invalid auth call key");
-
   unsigned Opc;
   if (Key == AArch64PACKey::IA)
     Opc = IsZeroDisc ? AArch64::BLRAAZ : AArch64::BLRAA;
@@ -1726,8 +1728,12 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
   case AArch64::AUTH_TCRETURN:
   case AArch64::AUTH_TCRETURN_BTI: {
     const uint64_t Key = MI->getOperand(2).getImm();
-    assert(Key < 2 && "Unknown key kind for authenticating tail-call return");
+    assert((Key == AArch64PACKey::IA || Key == AArch64PACKey::IB) &&
+           "Invalid auth key for tail-call return");
+
     const uint64_t Disc = MI->getOperand(3).getImm();
+    assert(isUInt<16>(Disc) && "Integer discriminator is too wide");
+
     Register AddrDisc = MI->getOperand(4).getReg();
 
     Register ScratchReg = MI->getOperand(0).getReg() == AArch64::X16
@@ -1736,8 +1742,6 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
 
     unsigned DiscReg = AddrDisc;
     if (Disc) {
-      assert(isUInt<16>(Disc) && "Integer discriminator is too wide");
-
       if (AddrDisc != AArch64::NoRegister) {
         EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ORRXrs)
                                          .addReg(ScratchReg)
@@ -1758,14 +1762,14 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
       DiscReg = ScratchReg;
     }
 
-    const bool isZero = DiscReg == AArch64::NoRegister;
+    const bool IsZero = DiscReg == AArch64::NoRegister;
     const unsigned Opcodes[2][2] = {{AArch64::BRAA, AArch64::BRAAZ},
                                     {AArch64::BRAB, AArch64::BRABZ}};
 
     MCInst TmpInst;
-    TmpInst.setOpcode(Opcodes[Key][isZero]);
+    TmpInst.setOpcode(Opcodes[Key][IsZero]);
     TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
-    if (!isZero)
+    if (!IsZero)
       TmpInst.addOperand(MCOperand::createReg(DiscReg));
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c5f08e8b1817a..ef34a8539ffa7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8468,8 +8468,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     Ops.insert(Ops.begin() + 1, GA);
   } else if (CallConv == CallingConv::ARM64EC_Thunk_X64) {
     Opc = AArch64ISD::CALL_ARM64EC_TO_X64;
-  } else if (GuardWithBTI)
+  } else if (GuardWithBTI) {
     Opc = AArch64ISD::CALL_BTI;
+  }
 
   if (IsTailCall) {
     // Each tail call may have to adjust the stack by a different amount, so
@@ -8480,9 +8481,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   if (CLI.PAI) {
     const uint64_t Key = CLI.PAI->Key;
-    // Authenticated calls only support IA and IB.
-    if (Key > 1)
-      report_fatal_error("Unsupported key kind for authenticating call");
+    assert((Key == AArch64PACKey::IA || Key == AArch64PACKey::IB) &&
+           "Invalid auth call key");
 
     // Split the discriminator into address/integer components.
     SDValue AddrDisc, IntDisc;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 53c35aa835bbc..0bac9d74ed5a8 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1021,10 +1021,11 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
 
   if (!IsTailCall) {
     if (!PAI)
-      return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
+      return IsIndirect ? getBLRCallOpcode(CallerF) : AArch64::BL;
 
-    assert(IsIndirect && "authenticated direct call");
-    assert(PAI->Key == 0 || PAI->Key == 1 && "invalid ptrauth key");
+    assert(IsIndirect && "Direct call should not be authenticated");
+    assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) &&
+           "Invalid auth call key");
     return AArch64::BLRA;
   }
 
@@ -1036,11 +1037,12 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
   if (FuncInfo->branchTargetEnforcement()) {
     if (PAI)
       return AArch64::AUTH_TCRETURN_BTI;
-    else if (FuncInfo->branchProtectionPAuthLR())
+    if (FuncInfo->branchProtectionPAuthLR())
       return AArch64::TCRETURNrix17;
-    else
-      return AArch64::TCRETURNrix16x17;
-  } else if (FuncInfo->branchProtectionPAuthLR())
+    return AArch64::TCRETURNrix16x17;
+  }
+
+  if (FuncInfo->branchProtectionPAuthLR())
     return AArch64::TCRETURNrinotx16;
 
   if (PAI)
@@ -1104,7 +1106,9 @@ bool AArch64CallLowering::lowerTailCall(
 
   // Authenticated tail calls always take key/discriminator arguments.
   if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) {
-    assert(Info.PAI->Key == 0 || Info.PAI->Key == 1 && "invalid key");
+    assert((Info.PAI->Key == AArch64PACKey::IA ||
+            Info.PAI->Key == AArch64PACKey::IB) &&
+           "Invalid auth call key");
     MIB.addImm(Info.PAI->Key);
 
     Register AddrDisc = 0;
@@ -1371,7 +1375,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
 
   if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) {
-    assert(Info.PAI->Key == 0 || Info.PAI->Key == 1 && "invalid key");
+    assert((Info.PAI->Key == AArch64PACKey::IA ||
+            Info.PAI->Key == AArch64PACKey::IB) &&
+           "Invalid auth call key");
     MIB.addImm(Info.PAI->Key);
 
     Register AddrDisc = 0;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-invoke.ll b/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-invoke.ll
deleted file mode 100644
index bd43776687f2e..0000000000000
--- a/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-invoke.ll
+++ /dev/null
@@ -1,183 +0,0 @@
-; RUN: llc -mtriple arm64e-apple-darwin -o - -global-isel -global-isel-abort=1 -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK
-
-; CHECK-LABEL: _test_invoke_ia_0:
-; CHECK-NEXT: [[FNBEGIN:L.*]]:
-; CHECK-NEXT:  .cfi_startproc
-; CHECK-NEXT:  .cfi_personality 155, ___gxx_personality_v0
-; CHECK-NEXT:  .cfi_lsda 16, [[EXCEPT:Lexception[0-9]+]]
-; CHECK-NEXT: ; %bb.0:
-; CHECK-NEXT:  stp x20, x19, [sp, #-32]!
-; CHECK-NEXT:  stp x29, x30, [sp, #16]
-; CHECK-NEXT:  .cfi_def_cfa_offset 32
-; CHECK-NEXT:  .cfi_offset w30, -8
-; CHECK-NEXT:  .cfi_offset w29, -16
-; CHECK-NEXT:  .cfi_offset w19, -24
-; CHECK-NEXT:  .cfi_offset w20, -32
-; CHECK-NEXT: [[PRECALL:L.*]]:
-; CHECK-NEXT:  blraaz x0
-; CHECK-NEXT:  mov x19, x0
-; CHECK-NEXT: [[POSTCALL:L.*]]:
-; CHECK-NEXT: [[POSTCALL_BB:L.*]]:
-; CHECK-NEXT:  bl _foo
-; CHECK-NEXT:  mov x0, x19
-; CHECK-NEXT:  ldp x29, x30, [sp, #16]
-; CHECK-NEXT:  ldp x20, x19, [sp], #32
-; CHECK-NEXT:  ret
-; CHECK-NEXT: [[LPADBB:LBB[0-9_]+]]:
-; CHECK-NEXT: [[LPAD:L.*]]:
-; CHECK-NEXT:  mov w19, #-1
-; CHECK-NEXT:  b [[POSTCALL_BB]]
-
-; CHECK-LABEL: GCC_except_table{{.*}}:
-; CHECK-NEXT: [[EXCEPT]]:
-; CHECK:       .uleb128 [[POSTCALL]]-[[PRECALL]] ;   Call between [[PRECALL]] and [[POSTCALL]]
-; CHECK-NEXT:  .uleb128 [[LPAD]]-[[FNBEGIN]]     ;     jumps to [[LPAD]]
-; CHECK-NEXT:  .byte 0                           ;   On action: cleanup
-
-define i32 @test_invoke_ia_0(ptr %arg0) #0 personality ptr @__gxx_personality_v0 {
-  %tmp0 = invoke i32 %arg0() [ "ptrauth"(i32 0, i64 0) ] to label %continuebb
-            unwind label %unwindbb
-
-unwindbb:
-  %tmp1 = landingpad { ptr, i32 } cleanup
-  call void @foo()
-  ret i32 -1
-
-continuebb:
-  call void @foo()
-  ret i32 %tmp0
-}
-
- at _ZTIPKc = external constant ptr
- at hello_str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
-
-; CHECK-LABEL: _test_invoke_ib_42_catch:
-; CHECK-NEXT: [[FNBEGIN:L.*]]:
-; CHECK-NEXT:         .cfi_startproc
-; CHECK-NEXT:         .cfi_personality 155, ___gxx_personality_v0
-; CHECK-NEXT:         .cfi_lsda 16, [[EXCEPT:Lexception[0-9]+]]
-; CHECK-NEXT: ; %bb.0:
-; CHECK-NEXT:         stp x20, x19, [sp, #-32]!
-; CHECK-NEXT:         stp x29, x30, [sp, #16]
-; CHECK-NEXT:         .cfi_def_cfa_offset 32
-; CHECK-NEXT:         .cfi_offset w30, -8
-; CHECK-NEXT:         .cfi_offset w29, -16
-; CHECK-NEXT:         .cfi_offset w19, -24
-; CHECK-NEXT:         .cfi_offset w20, -32
-; CHECK-NEXT:         mov x19, x0
-; CHECK-NEXT:         mov w0, #8
-; CHECK-NEXT:         bl ___cxa_allocate_exception
-; CHECK-NEXT: Lloh{{.*}}:
-; CHECK-NEXT:         adrp [[STR_TMP:x[0-9]+]], l_hello_str at PAGE
-; CHECK-NEXT: Lloh{{.*}}:
-; CHECK-NEXT:         add [[STR:x[0-9]+]], [[STR_TMP]], l_hello_str at PAGEOFF
-; CHECK-NEXT:         str [[STR]], [x0]
-; CHECK-NEXT: [[PRECALL:L.*]]:
-; CHECK-NEXT: Lloh{{.*}}:
-; CHECK-NEXT:         adrp x[[ZTI_TMP:[0-9]+]], __ZTIPKc at GOTPAGE
-; CHECK-NEXT: Lloh{{.*}}:
-; CHECK-NEXT:         ldr x1, [x[[ZTI_TMP]], __ZTIPKc at GOTPAGEOFF]
-; CHECK-NEXT:         mov x2, #0
-; CHECK-NEXT:         mov x17, #42
-; CHECK-NEXT:         blrab x19, x17
-; CHECK-NEXT: [[POSTCALL:L.*]]:
-; CHECK-NEXT: ; %bb.1:
-; CHECK-NEXT:         brk #0x1
-
-; CHECK-NEXT: [[LPADBB:LBB[0-9_]+]]:
-; CHECK-NEXT: [[LPAD:L.*]]:
-; CHECK-NEXT:         mov x[[TMP:[0-9]+]], x1
-; CHECK-NEXT:         bl ___cxa_begin_catch
-; CHECK-NEXT:         cmp w[[TMP]], #2
-
-; CHECK-NEXT:         b.ne [[EXITBB:LBB[0-9_]+]]
-; CHECK-NEXT: ; %bb.3:
-; CHECK-NEXT:         bl _bar
-; CHECK-NEXT: [[EXITBB]]:
-; CHECK-NEXT:         bl _foo
-; CHECK-NEXT:         bl ___cxa_end_catch
-; CHECK-NEXT:         ldp x29, x30, [sp, #16]
-; CHECK-NEXT:         ldp x20, x19, [sp], #32     ; 16-byte Folded Reload
-; CHECK-NEXT:         ret
-; CHECK-NEXT:         .loh {{.*}}
-; CHECK-NEXT:         .loh {{.*}}
-; CHECK-NEXT: [[FNEND:L.*]]:
-
-; CHECK-LABEL: GCC_except_table{{.*}}:
-; CHECK-NEXT: [[EXCEPT]]:
-; CHECK-NEXT:         .byte   255                     ; @LPStart Encoding = omit
-; CHECK-NEXT:         .byte   155                     ; @TType Encoding = indirect pcrel sdata4
-; CHECK-NEXT:         .uleb128 [[TT:L.*]]-[[TTREF:L.*]]
-; CHECK-NEXT: [[TTREF]]:
-; CHECK-NEXT:         .byte   1                       ; Call site Encoding = uleb128
-; CHECK-NEXT:         .uleb128 [[CSEND:L.*]]-[[CSBEGIN:L.*]]
-; CHECK-NEXT: [[CSBEGIN]]:
-; CHECK-NEXT:         .uleb128 [[FNBEGIN]]-[[FNBEGIN]]  ; >> Call Site 1 <<
-; CHECK-NEXT:         .uleb128 [[PRECALL]]-[[FNBEGIN]]  ;   Call between [[FNBEGIN]] and [[PRECALL]]
-; CHECK-NEXT:         .byte   0                         ;     has no landing pad
-; CHECK-NEXT:         .byte   0                         ;   On action: cleanup
-; CHECK-NEXT:         .uleb128 [[PRECALL]]-[[FNBEGIN]]  ; >> Call Site 2 <<
-; CHECK-NEXT:         .uleb128 [[POSTCALL]]-[[PRECALL]] ;   Call between [[PRECALL]] and [[POSTCALL]]
-; CHECK-NEXT:         .uleb128 [[LPAD]]-[[FNBEGIN]]     ;     jumps to [[LPAD]]
-; CHECK-NEXT:         .byte   3                         ;   On action: 2
-; CHECK-NEXT:         .uleb128 [[POSTCALL]]-[[FNBEGIN]] ; >> Call Site 3 <<
-; CHECK-NEXT:         .uleb128 [[FNEND]]-[[POSTCALL]]   ;   Call between [[POSTCALL]] and [[FNEND]]
-; CHECK-NEXT:         .byte   0                         ;     has no landing pad
-; CHECK-NEXT:         .byte   0                         ;   On action: cleanup
-; CHECK-NEXT: [[CSEND]]:
-
-; CHECK-NEXT:          .byte   1                       ; >> Action Record 1 <<
-; CHECK-NEXT:                                          ;   Catch TypeInfo 1
-; CHECK-NEXT:          .byte   0                       ;   No further actions
-; CHECK-NEXT:          .byte   2                       ; >> Action Record 2 <<
-; CHECK-NEXT:                                          ;   Catch TypeInfo 2
-; CHECK-NEXT:          .byte   125                     ;   Continue to action 1
-; CHECK-NEXT:          .p2align   2
-; CHECK-NEXT:                                          ; >> Catch TypeInfos <<
-; CHECK-NEXT: [[TI:L.*]]:                              ; TypeInfo 2
-; CHECK-NEXT:          .long   __ZTIPKc at GOT-[[TI]]
-; CHECK-NEXT:          .long   0                       ; TypeInfo 1
-
-; CHECK-NEXT: [[TT]]:
-
-define void @test_invoke_ib_42_catch(ptr %fptr) #0 personality ptr @__gxx_personality_v0 {
-  %tmp0 = call ptr @__cxa_allocate_exception(i64 8)
-  store ptr getelementptr inbounds ([6 x i8], ptr @hello_str, i64 0, i64 0), ptr %tmp0, align 8
-  invoke void %fptr(ptr %tmp0, ptr @_ZTIPKc, ptr null) [ "ptrauth"(i32 1, i64 42) ]
-          to label %continuebb unwind label %catchbb
-
-catchbb:
-  %tmp2 = landingpad { ptr, i32 }
-          catch ptr @_ZTIPKc
-          catch ptr null
-  %tmp3 = extractvalue { ptr, i32 } %tmp2, 0
-  %tmp4 = extractvalue { ptr, i32 } %tmp2, 1
-  %tmp5 = call i32 @llvm.eh.typeid.for(ptr @_ZTIPKc)
-  %tmp6 = icmp eq i32 %tmp4, %tmp5
-  %tmp7 = call ptr @__cxa_begin_catch(ptr %tmp3)
-  br i1 %tmp6, label %PKc_catchbb, label %any_catchbb
-
-PKc_catchbb:
-  call void @bar(ptr %tmp7)
-  br label %any_catchbb
-
-any_catchbb:
-  call void @foo()
-  call void @__cxa_end_catch()
-  ret void
-
-continuebb:
-  unreachable
-}
-
-declare void @foo()
-declare void @bar(ptr)
-
-declare i32 @__gxx_personality_v0(...)
-declare ptr @__cxa_allocate_exception(i64)
-declare void @__cxa_throw(ptr, ptr, ptr)
-declare i32 @llvm.eh.typeid.for(ptr)
-declare ptr @__cxa_begin_catch(ptr)
-declare void @__cxa_end_catch()
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-invoke.ll b/llvm/test/CodeGen/AArch64/ptrauth-invoke.ll
index bea699dbee37a..fcd0ddb788336 100644
--- a/llvm/test/CodeGen/AArch64/ptrauth-invoke.ll
+++ b/llvm/test/CodeGen/AArch64/ptrauth-invoke.ll
@@ -1,40 +1,86 @@
-; RUN: llc -mtriple arm64e-apple-darwin -o - %s | FileCheck %s --check-prefixes=CHECK,SDAG
-; RUN: llc -mtriple arm64e-apple-darwin -fast-isel -o - %s | FileCheck %s --check-prefixes=CHECK,FISEL
-
-; CHECK-LABEL: _test_invoke_ia_0:
-; CHECK-NEXT: [[FNBEGIN:L.*]]:
-; CHECK-NEXT:  .cfi_startproc
-; CHECK-NEXT:  .cfi_personality 155, ___gxx_personality_v0
-; CHECK-NEXT:  .cfi_lsda 16, [[EXCEPT:Lexception[0-9]+]]
-; CHECK-NEXT: ; %bb.0:
-; CHECK-NEXT:  stp x20, x19, [sp, #-32]!
-; CHECK-NEXT:  stp x29, x30, [sp, #16]
-; CHECK-NEXT:  .cfi_def_cfa_offset 32
-; CHECK-NEXT:  .cfi_offset w30, -8
-; CHECK-NEXT:  .cfi_offset w29, -16
-; CHECK-NEXT:  .cfi_offset w19, -24
-; CHECK-NEXT:  .cfi_offset w20, -32
-; CHECK-NEXT: [[PRECALL:L.*]]:
-; CHECK-NEXT:  blraaz x0
-; CHECK-NEXT: [[POSTCALL:L.*]]:
-; CHECK-NEXT: ; %bb.1:
-; CHECK-NEXT:  mov x19, x0
-; CHECK-NEXT: [[CALLBB:L.*]]:
-; CHECK-NEXT:  bl _foo
-; CHECK-NEXT:  mov x0, x19
-; CHECK-NEXT:  ldp x29, x30, [sp, #16]
-; CHECK-NEXT:  ldp x20, x19, [sp], #32
-; CHECK-NEXT:  ret
-; CHECK-NEXT: [[LPADBB:LBB[0-9_]+]]:
-; CHECK-NEXT: [[LPAD:L.*]]:
-; CHECK-NEXT:  mov w19, #-1
-; CHECK-NEXT:  b [[CALLBB]]
+; RUN: llc -mtriple arm64e-apple-darwin   -o - %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,DARWIN,DARWIN-SDAG
+
+; RUN: llc -mtriple aarch64 -mattr=+pauth -o - %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,ELF,ELF-SDAG
+
+; RUN: llc -mtriple arm64e-apple-darwin   -o - %s \
+; RUN:   -global-isel -global-isel-abort=1 -verify-machineinstrs \
+; RUN:  | FileCheck %s --check-prefixes=CHECK,DARWIN,DARWIN-GISEL
+
+; RUN: llc -mtriple aarch64 -mattr=+pauth -o - %s \
+; RUN:   -global-isel -global-isel-abort=1 -verify-machineinstrs \
+; RUN:  | FileCheck %s --check-prefixes=CHECK,ELF,ELF-GISEL
+
+; DARWIN-LABEL: _test_invoke_ia_0:
+; DARWIN-NEXT: [[FNBEGIN:L.*]]:
+; DARWIN-NEXT:  .cfi_startproc
+; DARWIN-NEXT:  .cfi_personality 155, ___gxx_personality_v0
+; DARWIN-NEXT:  .cfi_lsda 16, [[EXCEPT:Lexception[0-9]+]]
+; DARWIN-NEXT: ; %bb.0:
+; DARWIN-NEXT:  stp x20, x19, [sp, #-32]!
+; DARWIN-NEXT:  stp x29, x30, [sp, #16]
+; DARWIN-NEXT:  .cfi_def_cfa_offset 32
+; DARWIN-NEXT:  .cfi_offset w30, -8
+; DARWIN-NEXT:  .cfi_offset w29, -16
+; DARWIN-NEXT:  .cfi_offset w19, -24
+; DARWIN-NEXT:  .cfi_offset w20, -32
+; DARWIN-NEXT: [[PRECALL:L.*]]:
+; DARWIN-NEXT:  blraaz x0
+
+; DARWIN-SDAG-NEXT: [[POSTCALL:L.*]]:
+; DARWIN-SDAG-NEXT: ; %bb.1:
+; DARWIN-SDAG-NEXT:  mov x19, x0
+
+; DARWIN-GISEL-NEXT:  mov x19, x0
+; DARWIN-GISEL-NEXT: [[POSTCALL:L.*]]:
+
+; DARWIN-NEXT: [[CALLBB:L.*]]:
+; DARWIN-NEXT:  bl _foo
+; DARWIN-NEXT:  mov x0, x19
+; DARWIN-NEXT:  ldp x29, x30, [sp, #16]
+; DARWIN-NEXT:  ldp x20, x19, [sp], #32
+; DARWIN-NEXT:  ret
+; DARWIN-NEXT: [[LPADBB:LBB[0-9_]+]]:
+; DARWIN-NEXT: [[LPAD:L.*]]:
+; DARWIN-NEXT:  mov w19, #-1
+; DARWIN-NEXT:  b [[CALLBB]]
+
+; ELF-LABEL: test_invoke_ia_0:
+; ELF-NEXT: [[FNBEGIN:.L.*]]:
+; ELF-NEXT:  .cfi_startproc
+; ELF-NEXT:  .cfi_personality 156, DW.ref.__gxx_personality_v0
+; ELF-NEXT:  .cfi_lsda 28, [[EXCEPT:.Lexception[0-9]+]]
+; ELF-NEXT: // %bb.0:
+; ELF-NEXT:  stp x30, x19, [sp, #-16]!
+; ELF-NEXT:  .cfi_def_cfa_offset 16
+; ELF-NEXT:  .cfi_offset w19, -8
+; ELF-NEXT:  .cfi_offset w30, -16
+; ELF-NEXT: [[PRECALL:.L.*]]:
+; ELF-NEXT:  blraaz x0
+
+; ELF-SDAG-NEXT: [[POSTCALL:.L.*]]:
+; ELF-SDAG-NEXT: // %bb.1:
+; ELF-SDAG-NEXT:  mov w19, w0
+
+; ELF-GISEL-NEXT:  mov w19, w0
+; ELF-GISEL-NEXT: [[POSTCALL:.L.*]]:
+
+; ELF-NEXT: [[CALLBB:.L.*]]:
+; ELF-NEXT:  bl foo
+; ELF-NEXT:  mov w0, w19
+; ELF-NEXT:  ldp x30, x19, [sp], #16
+; ELF-NEXT:  ret
+; ELF-NEXT: [[LPADBB:.LBB[0-9_]+]]:
+; ELF-NEXT: [[LPAD:.L.*]]:
+; ELF-NEXT:  mov w19, #-1
+; ELF-NEXT:  b [[CALLBB]]
 
 ; CHECK-LABEL: GCC_except_table{{.*}}:
 ; CHECK-NEXT: [[EXCEPT]]:
-; CHECK:       .uleb128 [[POSTCALL]]-[[PRECALL]] ;   Call between [[PRECALL]] and [[POSTCALL]]
-; CHECK-NEXT:  .uleb128 [[LPAD]]-[[FNBEGIN]]     ;     jumps to [[LPAD]]
-; CHECK-NEXT:  .byte 0                           ;   On action: cleanup
+; CHECK:       .uleb128 [[POSTCALL]]-[[PRECALL]] {{.*}} Call between [[PRECALL]] and [[POSTCALL]]
+; CHECK-NEXT:  .uleb128 [[LPAD]]-[[FNBEGIN]]     {{.*}}   jumps to [[LPAD]]
+; CHECK-NEXT:  .byte 0                           {{.*}} On action: cleanup
 
 define i32 @test_invoke_ia_0(ptr %arg0) #0 personality ptr @__gxx_personality_v0 {
   %tmp0 = invoke i32 %arg0() [ "ptrauth"(i32 0, i64 0) ] to label %continuebb
@@ -53,99 +99,140 @@ continuebb:
 @_ZTIPKc = external constant ptr
 @hello_str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
 
-; CHECK-LABEL: _test_invoke_ib_42_catch:
-; CHECK-NEXT: [[FNBEGIN:L.*]]:
-; CHECK-NEXT:         .cfi_startproc
-; CHECK-NEXT:         .cfi_personality 155, ___gxx_personality_v0
-; CHECK-NEXT:         .cfi_lsda 16, [[EXCEPT:Lexception[0-9]+]]
-; CHECK-NEXT: ; %bb.0:
-; CHECK-NEXT:         stp x20, x19, [sp, #-32]!
-; CHECK-NEXT:         stp x29, x30, [sp, #16]
-; CHECK-NEXT:         .cfi_def_cfa_offset 32
-; CHECK-NEXT:         .cfi_offset w30, -8
-; CHECK-NEXT:         .cfi_offset w29, -16
-; CHECK-NEXT:         .cfi_offset w19, -24
-; CHECK-NEXT:         .cfi_offset w20, -32
-; CHECK-NEXT:         mov x19, x0
-; CHECK-NEXT:         mov w0, #8
-; CHECK-NEXT:         bl ___cxa_allocate_exception
-; CHECK-NEXT: Lloh{{.*}}:
-; CHECK-NEXT:         adrp x8, l_hello_str at PAGE
-; CHECK-NEXT: Lloh{{.*}}:
-; CHECK-NEXT:         add x8, x8, l_hello_str at PAGEOFF
-; CHECK-NEXT:         str x8, [x0]
-; CHECK-NEXT: [[PRECALL:L.*]]:
-; CHECK-NEXT: Lloh{{.*}}:
-; CHECK-NEXT:         adrp x1, __ZTIPKc at GOTPAGE
-; CHECK-NEXT: Lloh{{.*}}:
-; CHECK-NEXT:         ldr x1, [x1, __ZTIPKc at GOTPAGEOFF]
-; CHECK-NEXT:         mov x2, #0
-; CHECK-NEXT:         mov x17, #42
-; CHECK-NEXT:         blrab x19, x17
-; CHECK-NEXT: [[POSTCALL:L.*]]:
-; CHECK-NEXT: ; %bb.1:
-; CHECK-NEXT:         brk #0x1
-; CHECK-NEXT: [[LPADBB:LBB[0-9_]+]]:
-; CHECK-NEXT: [[LPAD:L.*]]:
-; CHECK-NEXT:         mov x19, x1
-
-; SDAG-NEXT:          bl ___cxa_begin_catch
-; SDAG-NEXT:          cmp     w19, #2
-
-; FISEL-NEXT:         mov w20, #2
-; FISEL-NEXT:         bl ___cxa_begin_catch
-; FISEL-NEXT:         cmp w19, w20
-
-; CHECK-NEXT:         b.ne [[EXITBB:LBB[0-9_]+]]
-; CHECK-NEXT: ; %bb.3:
-; CHECK-NEXT:         bl _bar
-; CHECK-NEXT: [[EXITBB]]:
-; CHECK-NEXT:         bl _foo
-; CHECK-NEXT:         bl ___cxa_end_catch
-; CHECK-NEXT:         ldp x29, x30, [sp, #16]
-; CHECK-NEXT:         ldp x20, x19, [sp], #32
-; CHECK-NEXT:         ret
-; CHECK-NEXT:         .loh {{.*}}
-; CHECK-NEXT:         .loh {{.*}}
-; CHECK-NEXT: [[FNEND:L.*]]:
+; DARWIN-LABEL: _test_invoke_ib_42_catch:
+; DARWIN-NEXT: [[FNBEGIN:L.*]]:
+; DARWIN-NEXT:         .cfi_startproc
+; DARWIN-NEXT:         .cfi_personality 155, ___gxx_personality_v0
+; DARWIN-NEXT:         .cfi_lsda 16, [[EXCEPT:Lexception[0-9]+]]
+; DARWIN-NEXT: ; %bb.0:
+; DARWIN-NEXT:         stp x20, x19, [sp, #-32]!
+; DARWIN-NEXT:         stp x29, x30, [sp, #16]
+; DARWIN-NEXT:         .cfi_def_cfa_offset 32
+; DARWIN-NEXT:         .cfi_offset w30, -8
+; DARWIN-NEXT:         .cfi_offset w29, -16
+; DARWIN-NEXT:         .cfi_offset w19, -24
+; DARWIN-NEXT:         .cfi_offset w20, -32
+; DARWIN-NEXT:         mov x19, x0
+; DARWIN-NEXT:         mov w0, #8
+; DARWIN-NEXT:         bl ___cxa_allocate_exception
+; DARWIN-NEXT: Lloh{{.*}}:
+; DARWIN-NEXT:         adrp x8, l_hello_str at PAGE
+; DARWIN-NEXT: Lloh{{.*}}:
+; DARWIN-NEXT:         add x8, x8, l_hello_str at PAGEOFF
+; DARWIN-NEXT:         str x8, [x0]
+; DARWIN-NEXT: [[PRECALL:L.*]]:
+; DARWIN-NEXT: Lloh{{.*}}:
+; DARWIN-NEXT:         adrp x1, __ZTIPKc at GOTPAGE
+; DARWIN-NEXT: Lloh{{.*}}:
+; DARWIN-NEXT:         ldr x1, [x1, __ZTIPKc at GOTPAGEOFF]
+; DARWIN-NEXT:         mov x2, #0
+; DARWIN-NEXT:         mov x17, #42
+; DARWIN-NEXT:         blrab x19, x17
+; DARWIN-NEXT: [[POSTCALL:L.*]]:
+; DARWIN-NEXT: ; %bb.1:
+; DARWIN-NEXT:         brk #0x1
+; DARWIN-NEXT: [[LPADBB:LBB[0-9_]+]]:
+; DARWIN-NEXT: [[LPAD:L.*]]:
+; DARWIN-NEXT:         mov x19, x1
+; DARWIN-NEXT:         bl ___cxa_begin_catch
+; DARWIN-NEXT:         cmp     w19, #2
+; DARWIN-NEXT:         b.ne [[EXITBB:LBB[0-9_]+]]
+; DARWIN-NEXT: ; %bb.3:
+; DARWIN-NEXT:         bl _bar
+; DARWIN-NEXT: [[EXITBB]]:
+; DARWIN-NEXT:         bl _foo
+; DARWIN-NEXT:         bl ___cxa_end_catch
+; DARWIN-NEXT:         ldp x29, x30, [sp, #16]
+; DARWIN-NEXT:         ldp x20, x19, [sp], #32
+; DARWIN-NEXT:         ret
+; DARWIN-NEXT:         .loh {{.*}}
+; DARWIN-NEXT:         .loh {{.*}}
+; DARWIN-NEXT: [[FNEND:L.*]]:
+
+; ELF-LABEL: test_invoke_ib_42_catch:
+; ELF-NEXT: [[FNBEGIN:.L.*]]:
+; ELF-NEXT:         .cfi_startproc
+; ELF-NEXT:         .cfi_personality 156, DW.ref.__gxx_personality_v0
+; ELF-NEXT:         .cfi_lsda 28, [[EXCEPT:.Lexception[0-9]+]]
+; ELF-NEXT: // %bb.0:
+; ELF-NEXT:         stp x30, x19, [sp, #-16]!
+; ELF-NEXT:         .cfi_def_cfa_offset 16
+; ELF-NEXT:         .cfi_offset w19, -8
+; ELF-NEXT:         .cfi_offset w30, -16
+; ELF-NEXT:         mov x19, x0
+; ELF-NEXT:         mov w0, #8
+; ELF-NEXT:         bl __cxa_allocate_exception
+; ELF-NEXT:         adrp x8, .Lhello_str
+; ELF-NEXT:         add x8, x8, :lo12:.Lhello_str
+; ELF-NEXT:         str x8, [x0]
+; ELF-NEXT: [[PRECALL:.L.*]]:
+; ELF-NEXT:         adrp x1, :got:_ZTIPKc
+; ELF-NEXT:         mov x2, xzr
+; ELF-NEXT:         ldr x1, [x1, :got_lo12:_ZTIPKc]
+; ELF-NEXT:         mov x17, #42
+; ELF-NEXT:         blrab x19, x17
+; ELF-NEXT: [[POSTCALL:.L.*]]:
+; ELF-NEXT: // %bb.1:
+; ELF-NEXT: [[LPADBB:.LBB[0-9_]+]]:
+; ELF-NEXT: [[LPAD:.L.*]]:
+; ELF-NEXT:         mov x19, x1
+; ELF-NEXT:         bl __cxa_begin_catch
+; ELF-NEXT:         cmp     w19, #2
+; ELF-NEXT:         b.ne [[EXITBB:.LBB[0-9_]+]]
+; ELF-NEXT: // %bb.3:
+; ELF-NEXT:         bl bar
+; ELF-NEXT: [[EXITBB]]:
+; ELF-NEXT:         bl foo
+; ELF-NEXT:         bl __cxa_end_catch
+; ELF-NEXT:         ldp x30, x19, [sp], #16
+; ELF-NEXT:         ret
+; ELF-NEXT: [[FNEND:.L.*]]:
 
 ; CHECK-LABEL: GCC_except_table{{.*}}:
 ; CHECK-NEXT: [[EXCEPT]]:
-; CHECK-NEXT:         .byte   255                     ; @LPStart Encoding = omit
-; CHECK-NEXT:         .byte   155                     ; @TType Encoding = indirect pcrel sdata4
-; CHECK-NEXT:         .uleb128 [[TT:L.*]]-[[TTREF:L.*]]
+; CHECK-NEXT:   .byte   255                       {{.*}} @LPStart Encoding = omit
+; DARWIN-NEXT:  .byte   155                       {{.*}} @TType Encoding = indirect pcrel sdata4
+; ELF-NEXT:     .byte   156                       {{.*}} @TType Encoding = indirect pcrel sdata8
+; CHECK-NEXT:   .uleb128 [[TT:.?L.*]]-[[TTREF:.?L.*]]
 ; CHECK-NEXT: [[TTREF]]:
-; CHECK-NEXT:         .byte   1                       ; Call site Encoding = uleb128
-; CHECK-NEXT:         .uleb128 [[CSEND:L.*]]-[[CSBEGIN:L.*]]
+; CHECK-NEXT:   .byte   1                         {{.*}} Call site Encoding = uleb128
+; CHECK-NEXT:   .uleb128 [[CSEND:.?L.*]]-[[CSBEGIN:.?L.*]]
 ; CHECK-NEXT: [[CSBEGIN]]:
-; CHECK-NEXT:         .uleb128 [[FNBEGIN]]-[[FNBEGIN]]  ; >> Call Site 1 <<
-; CHECK-NEXT:         .uleb128 [[PRECALL]]-[[FNBEGIN]]  ;   Call between [[FNBEGIN]] and [[PRECALL]]
-; CHECK-NEXT:         .byte   0                         ;     has no landing pad
-; CHECK-NEXT:         .byte   0                         ;   On action: cleanup
-; CHECK-NEXT:         .uleb128 [[PRECALL]]-[[FNBEGIN]]  ; >> Call Site 2 <<
-; CHECK-NEXT:         .uleb128 [[POSTCALL]]-[[PRECALL]] ;   Call between [[PRECALL]] and [[POSTCALL]]
-; CHECK-NEXT:         .uleb128 [[LPAD]]-[[FNBEGIN]]     ;     jumps to [[LPAD]]
-; CHECK-NEXT:         .byte   3                         ;   On action: 2
-; CHECK-NEXT:         .uleb128 [[POSTCALL]]-[[FNBEGIN]] ; >> Call Site 3 <<
-; CHECK-NEXT:         .uleb128 [[FNEND]]-[[POSTCALL]]   ;   Call between [[POSTCALL]] and [[FNEND]]
-; CHECK-NEXT:         .byte   0                         ;     has no landing pad
-; CHECK-NEXT:         .byte   0                         ;   On action: cleanup
+; CHECK-NEXT:   .uleb128 [[FNBEGIN]]-[[FNBEGIN]]  {{.*}} >> Call Site 1 <<
+; CHECK-NEXT:   .uleb128 [[PRECALL]]-[[FNBEGIN]]  {{.*}}   Call between [[FNBEGIN]] and [[PRECALL]]
+; CHECK-NEXT:   .byte   0                         {{.*}}     has no landing pad
+; CHECK-NEXT:   .byte   0                         {{.*}}   On action: cleanup
+; CHECK-NEXT:   .uleb128 [[PRECALL]]-[[FNBEGIN]]  {{.*}} >> Call Site 2 <<
+; CHECK-NEXT:   .uleb128 [[POSTCALL]]-[[PRECALL]] {{.*}}   Call between [[PRECALL]] and [[POSTCALL]]
+; CHECK-NEXT:   .uleb128 [[LPAD]]-[[FNBEGIN]]     {{.*}}     jumps to [[LPAD]]
+; CHECK-NEXT:   .byte   3                         {{.*}}   On action: 2
+; CHECK-NEXT:   .uleb128 [[POSTCALL]]-[[FNBEGIN]] {{.*}} >> Call Site 3 <<
+; CHECK-NEXT:   .uleb128 [[FNEND]]-[[POSTCALL]]   {{.*}}   Call between [[POSTCALL]] and [[FNEND]]
+; CHECK-NEXT:   .byte   0                         {{.*}}     has no landing pad
+; CHECK-NEXT:   .byte   0                         {{.*}}   On action: cleanup
 ; CHECK-NEXT: [[CSEND]]:
 
-; CHECK-NEXT:          .byte   1                       ; >> Action Record 1 <<
-; CHECK-NEXT:                                          ;   Catch TypeInfo 1
-; CHECK-NEXT:          .byte   0                       ;   No further actions
-; CHECK-NEXT:          .byte   2                       ; >> Action Record 2 <<
-; CHECK-NEXT:                                          ;   Catch TypeInfo 2
-; CHECK-NEXT:          .byte   125                     ;   Continue to action 1
-; CHECK-NEXT:          .p2align   2
-; CHECK-NEXT:                                          ; >> Catch TypeInfos <<
-; CHECK-NEXT: [[TI:L.*]]:                              ; TypeInfo 2
-; CHECK-NEXT:          .long   __ZTIPKc at GOT-[[TI]]
-; CHECK-NEXT:          .long   0                       ; TypeInfo 1
+; CHECK-NEXT:   .byte   1                         {{.*}} >> Action Record 1 <<
+; CHECK-NEXT:                                     {{.*}}   Catch TypeInfo 1
+; CHECK-NEXT:   .byte   0                         {{.*}}   No further actions
+; CHECK-NEXT:   .byte   2                         {{.*}} >> Action Record 2 <<
+; CHECK-NEXT:                                     {{.*}}   Catch TypeInfo 2
+; CHECK-NEXT:   .byte   125                       {{.*}}   Continue to action 1
+; CHECK-NEXT:   .p2align   2
+; CHECK-NEXT:                                     {{.*}} >> Catch TypeInfos <<
+
+; DARWIN-NEXT: [[TI:L.*]]:                        {{.*}} TypeInfo 2
+; DARWIN-NEXT:  .long   __ZTIPKc at GOT-[[TI]]
+; DARWIN-NEXT:  .long   0                         {{.*}} TypeInfo 1
+; ELF-NEXT:    [[TI:.?L.*]]:                      {{.*}} TypeInfo 2
+; ELF-NEXT:     .xword  .L_ZTIPKc.DW.stub-[[TI]]
+; ELF-NEXT:     .xword   0                        {{.*}} TypeInfo 1
 
 ; CHECK-NEXT: [[TT]]:
 
+; ELF-LABEL:  .L_ZTIPKc.DW.stub:
+; ELF-NEXT:     .xword  _ZTIPKc
+
 define void @test_invoke_ib_42_catch(ptr %fptr) #0 personality ptr @__gxx_personality_v0 {
   %tmp0 = call ptr @__cxa_allocate_exception(i64 8)
   store ptr getelementptr inbounds ([6 x i8], ptr @hello_str, i64 0, i64 0), ptr %tmp0, align 8

>From 1714447c5a08d7bbe4fd2db17fa07ebf6169090a Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed at bougacha.org>
Date: Tue, 28 May 2024 21:26:02 -0700
Subject: [PATCH 4/5] Address review feedback.

- AArch64ExpandPseudos: generalize & use createCall
- AArch64InstrInfo: describe x16/x17 usage in pseudo
- AArch64InstrInfo: group SDNodes with others
- test call for ELF as well
- GlobalISel: rename PointerAuthInfo, reorder fields
- various nits, auto, const
---
 .../llvm/CodeGen/GlobalISel/CallLowering.h    |  10 +-
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  |   2 +-
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |  10 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   6 +-
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |   1 -
 .../AArch64/AArch64ExpandPseudoInsts.cpp      |  74 +++---
 .../Target/AArch64/AArch64ISelLowering.cpp    |   2 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  57 ++--
 .../AArch64/GISel/AArch64CallLowering.cpp     |   2 +-
 llvm/test/CodeGen/AArch64/ptrauth-call.ll     | 249 ++++++++++++------
 10 files changed, 245 insertions(+), 168 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index fb298898304eb..bb3be3e2b4f98 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -99,9 +99,9 @@ class CallLowering {
     ArgInfo() = default;
   };
 
-  struct PointerAuthInfo {
-    Register Discriminator;
+  struct PtrAuthInfo {
     uint64_t Key;
+    Register Discriminator;
   };
 
   struct CallLoweringInfo {
@@ -130,7 +130,8 @@ class CallLowering {
 
     MDNode *KnownCallees = nullptr;
 
-    std::optional<PointerAuthInfo> PAI;
+    /// The auth-call information in the "ptrauth" bundle, if present.
+    std::optional<PtrAuthInfo> PAI;
 
     /// True if the call must be tail call optimized.
     bool IsMustTailCall = false;
@@ -594,8 +595,7 @@ class CallLowering {
   bool lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &Call,
                  ArrayRef<Register> ResRegs,
                  ArrayRef<ArrayRef<Register>> ArgRegs, Register SwiftErrorVReg,
-                 std::optional<PointerAuthInfo> PAI,
-                 Register ConvergenceCtrlToken,
+                 std::optional<PtrAuthInfo> PAI, Register ConvergenceCtrlToken,
                  std::function<unsigned()> GetCalleeReg) const;
 
   /// For targets which want to use big-endian can enable it with
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 740a00d8afdd4..412cd0a21ad41 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -92,7 +92,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
                              ArrayRef<Register> ResRegs,
                              ArrayRef<ArrayRef<Register>> ArgRegs,
                              Register SwiftErrorVReg,
-                             std::optional<PointerAuthInfo> PAI,
+                             std::optional<PtrAuthInfo> PAI,
                              Register ConvergenceCtrlToken,
                              std::function<unsigned()> GetCalleeReg) const {
   CallLoweringInfo Info;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 299f16dbddf12..3e4a4c66d7fc6 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2642,18 +2642,18 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
     }
   }
 
-  std::optional<CallLowering::PointerAuthInfo> PAI;
+  std::optional<CallLowering::PtrAuthInfo> PAI;
   if (CB.countOperandBundlesOfType(LLVMContext::OB_ptrauth)) {
     // Functions should never be ptrauth-called directly.
     assert(!CB.getCalledFunction() && "invalid direct ptrauth call");
 
     auto PAB = CB.getOperandBundle("ptrauth");
-    Value *Key = PAB->Inputs[0];
-    Value *Discriminator = PAB->Inputs[1];
+    const Value *Key = PAB->Inputs[0];
+    const Value *Discriminator = PAB->Inputs[1];
 
     Register DiscReg = getOrCreateVReg(*Discriminator);
-    PAI = CallLowering::PointerAuthInfo{DiscReg,
-                                        cast<ConstantInt>(Key)->getZExtValue()};
+    PAI = CallLowering::PtrAuthInfo{cast<ConstantInt>(Key)->getZExtValue(),
+                                    DiscReg};
   }
 
   Register ConvergenceCtrlToken = 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1de98e5559ee8..692b913ee5381 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9225,12 +9225,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
 void SelectionDAGBuilder::LowerCallSiteWithPtrAuthBundle(
     const CallBase &CB, const BasicBlock *EHPadBB) {
   auto PAB = CB.getOperandBundle("ptrauth");
-  auto *CalleeV = CB.getCalledOperand();
+  const Value *CalleeV = CB.getCalledOperand();
 
   // Gather the call ptrauth data from the operand bundle:
   //   [ i32 <key>, i64 <discriminator> ]
-  auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
-  Value *Discriminator = PAB->Inputs[1];
+  const auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
+  const Value *Discriminator = PAB->Inputs[1];
 
   assert(Key->getType()->isIntegerTy(32) && "Invalid ptrauth key");
   assert(Discriminator->getType()->isIntegerTy(64) &&
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index eed2b3114ed93..6ff9d29912dd0 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1550,7 +1550,6 @@ unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
 
 void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
   unsigned InstsEmitted = 0;
-
   unsigned BrTarget = MI->getOperand(0).getReg();
 
   auto Key = (AArch64PACKey::ID)MI->getOperand(1).getImm();
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 657324d2307c5..9dc3fe661df8e 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -774,26 +774,24 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
   return true;
 }
 
-// Create a call to CallTarget, copying over all the operands from *MBBI,
-// starting at the regmask.
-static MachineInstr *createCall(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MBBI,
-                                const AArch64InstrInfo *TII,
-                                MachineOperand &CallTarget,
-                                unsigned RegMaskStartIdx) {
-  unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
-  MachineInstr *Call =
-      BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opc)).getInstr();
-
-  assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
-         "invalid operand for regular call");
-  Call->addOperand(CallTarget);
+// Create a call with the passed opcode and explicit operands, copying over all
+// the implicit operands from *MBBI, starting at the regmask.
+static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI,
+                                       const AArch64InstrInfo *TII,
+                                       unsigned Opcode,
+                                       ArrayRef<MachineOperand> ExplicitOps,
+                                       unsigned RegMaskStartIdx) {
+  // Build the MI, with explicit operands first (including the call target).
+  MachineInstr *Call = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode))
+                           .add(ExplicitOps)
+                           .getInstr();
 
   // Register arguments are added during ISel, but cannot be added as explicit
   // operands of the branch as it expects to be B <target> which is only one
   // operand. Instead they are implicit operands used by the branch.
   while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) {
-    auto MOP = MBBI->getOperand(RegMaskStartIdx);
+    MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx);
     assert(MOP.isReg() && "can only add register operands");
     Call->addOperand(MachineOperand::CreateReg(
         MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
@@ -807,6 +805,20 @@ static MachineInstr *createCall(MachineBasicBlock &MBB,
   return Call;
 }
 
+// Create a call to CallTarget, copying over all the operands from *MBBI,
+// starting at the regmask.
+static MachineInstr *createCall(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI,
+                                const AArch64InstrInfo *TII,
+                                MachineOperand &CallTarget,
+                                unsigned RegMaskStartIdx) {
+  unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
+
+  assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
+         "invalid operand for regular call");
+  return createCallWithOps(MBB, MBBI, TII, Opc, CallTarget, RegMaskStartIdx);
+}
+
 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
   // Expand CALL_RVMARKER pseudo to:
@@ -822,33 +834,19 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
 
   if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
     // Pointer auth call.
+    MachineOperand &CallTarget = MI.getOperand(1);
     MachineOperand &Key = MI.getOperand(2);
-    assert((Key.getImm() == 0 || Key.getImm() == 1) &&
-           "invalid key for ptrauth call");
     MachineOperand &IntDisc = MI.getOperand(3);
     MachineOperand &AddrDisc = MI.getOperand(4);
 
-    OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BLRA))
-                       .getInstr();
-    OriginalCall->addOperand(MI.getOperand(1));
-    OriginalCall->addOperand(Key);
-    OriginalCall->addOperand(IntDisc);
-    OriginalCall->addOperand(AddrDisc);
-
-    unsigned RegMaskStartIdx = 5;
-    // Skip register arguments. Those are added during ISel, but are not
-    // needed for the concrete branch.
-    while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
-      auto MOP = MI.getOperand(RegMaskStartIdx);
-      assert(MOP.isReg() && "can only add register operands");
-      OriginalCall->addOperand(MachineOperand::CreateReg(
-          MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
-          /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
-      RegMaskStartIdx++;
-    }
-    for (const MachineOperand &MO :
-         llvm::drop_begin(MI.operands(), RegMaskStartIdx))
-      OriginalCall->addOperand(MO);
+    assert((Key.getImm() == AArch64PACKey::IA ||
+            Key.getImm() == AArch64PACKey::IB) &&
+           "Invalid auth call key");
+
+    MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
+
+    OriginalCall = createCallWithOps(MBB, MBBI, TII, AArch64::BLRA, Ops,
+                                     /*RegMaskStartIdx=*/5);
   } else {
     assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
     OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(1),
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ef34a8539ffa7..275d5eddd317b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -349,7 +349,7 @@ extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {
   // If the constant discriminator (either the blend RHS, or the entire
   // discriminator value) isn't a 16-bit constant, bail out, and let the
   // discriminator be computed separately.
-  auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
+  const auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
   if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
     return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c0cda5315b8d3..c6e76716ff6b4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -662,6 +662,30 @@ def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64",
                                       [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                                        SDNPVariadic]>;
 
+def AArch64authcall : SDNode<"AArch64ISD::AUTH_CALL",
+                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
+                                                   SDTCisVT<1, i32>,
+                                                   SDTCisVT<2, i64>,
+                                                   SDTCisVT<3, i64>]>,
+                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                              SDNPVariadic]>;
+
+def AArch64authtcret: SDNode<"AArch64ISD::AUTH_TC_RETURN",
+                             SDTypeProfile<0, 5, [SDTCisPtrTy<0>,
+                                                  SDTCisVT<2, i32>,
+                                                  SDTCisVT<3, i64>,
+                                                  SDTCisVT<4, i64>]>,
+                             [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+def AArch64authcall_rvmarker : SDNode<"AArch64ISD::AUTH_CALL_RVMARKER",
+                                 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
+                                                       SDTCisPtrTy<1>,
+                                                       SDTCisVT<2, i32>,
+                                                       SDTCisVT<3, i64>,
+                                                       SDTCisVT<4, i64>]>,
+                                 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                                  SDNPVariadic]>;
+
 def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
                                 [SDNPHasChain]>;
 def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
@@ -1564,30 +1588,6 @@ let Predicates = [HasComplxNum, HasNEON] in {
                          (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
 }
 
-def AArch64authcall : SDNode<"AArch64ISD::AUTH_CALL",
-                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
-                                                   SDTCisVT<1, i32>,
-                                                   SDTCisVT<2, i64>,
-                                                   SDTCisVT<3, i64>]>,
-                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                              SDNPVariadic]>;
-
-def AArch64authtcret: SDNode<"AArch64ISD::AUTH_TC_RETURN",
-                             SDTypeProfile<0, 5, [SDTCisPtrTy<0>,
-                                                  SDTCisVT<2, i32>,
-                                                  SDTCisVT<3, i64>,
-                                                  SDTCisVT<4, i64>]>,
-                             [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
-
-def AArch64authcall_rvmarker : SDNode<"AArch64ISD::AUTH_CALL_RVMARKER",
-                                 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
-                                                       SDTCisPtrTy<1>,
-                                                       SDTCisVT<2, i32>,
-                                                       SDTCisVT<3, i64>,
-                                                       SDTCisVT<4, i64>]>,
-                                 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                                  SDNPVariadic]>;
-
 // v8.3a Pointer Authentication
 // These instructions inhabit part of the hint space and so can be used for
 // armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
@@ -1716,9 +1716,12 @@ let Predicates = [HasPAuth] in {
     def BLRABZ  : AuthOneOperand<0b001, 1, "blrabz">;
   }
 
-  // BLRA pseudo, generalized version of BLRAA/BLRAB/Z.
-  // This directly manipulates x16/x17, which are the only registers the OS
-  // guarantees are safe to use for sensitive operations.
+  // BLRA pseudo, a generalized version of BLRAA/BLRAB/Z.
+  // This directly manipulates x16/x17 to materialize the discriminator.
+  // x16/x17 are generally used as the safe registers for sensitive ptrauth
+  // operations (such as raw address manipulation or discriminator
+  // materialization here), in part because they're handled in a safer way by
+  // the kernel, notably on Darwin.
   def BLRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
                                  GPR64noip:$AddrDisc),
                     [(AArch64authcall GPR64noip:$Rn, timm:$Key, timm:$Disc,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 0bac9d74ed5a8..d42b78ea55b53 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1015,7 +1015,7 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization(
 
 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
                               bool IsTailCall,
-                              std::optional<CallLowering::PointerAuthInfo> &PAI,
+                              std::optional<CallLowering::PtrAuthInfo> &PAI,
                               MachineRegisterInfo &MRI) {
   const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>();
 
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-call.ll b/llvm/test/CodeGen/AArch64/ptrauth-call.ll
index 585ac0e75e6ac..b1249891b04b4 100644
--- a/llvm/test/CodeGen/AArch64/ptrauth-call.ll
+++ b/llvm/test/CodeGen/AArch64/ptrauth-call.ll
@@ -1,192 +1,269 @@
-; RUN: llc -mtriple arm64e-apple-darwin                                   -asm-verbose=false -o - %s | FileCheck %s
-; RUN: llc -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -asm-verbose=false -o - %s | FileCheck %s
-; RUN: llc -mtriple arm64e-apple-darwin -emit-call-site-info              -asm-verbose=false -o - %s | FileCheck %s
-
-; CHECK-LABEL: _test_call_ia_0:
-; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
-; CHECK-NEXT:  blraaz x0
-; CHECK-NEXT:  ldp x29, x30, [sp], #16
-; CHECK-NEXT:  ret
+; RUN: llc -mtriple arm64e-apple-darwin   -o - %s -asm-verbose=0 \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,DARWIN
+
+; RUN: llc -mtriple aarch64 -mattr=+pauth -o - %s -asm-verbose=0 \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,ELF
+
+; RUN: llc -mtriple arm64e-apple-darwin   -o - %s -asm-verbose=0 \
+; RUN:   -global-isel -global-isel-abort=1 -verify-machineinstrs \
+; RUN:  | FileCheck %s --check-prefixes=CHECK,DARWIN
+
+; RUN: llc -mtriple aarch64 -mattr=+pauth -o - %s -asm-verbose=0 \
+; RUN:   -global-isel -global-isel-abort=1 -verify-machineinstrs \
+; RUN:  | FileCheck %s --check-prefixes=CHECK,ELF
+
+
 define i32 @test_call_ia_0(i32 ()* %arg0) #0 {
+; DARWIN-LABEL: test_call_ia_0:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]!
+; DARWIN-NEXT:    blraaz x0
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16
+; DARWIN-NEXT:    ret
+;
+; ELF-LABEL: test_call_ia_0:
+; ELF-NEXT:    str x30, [sp, #-16]!
+; ELF-NEXT:    blraaz x0
+; ELF-NEXT:    ldr x30, [sp], #16
+; ELF-NEXT:    ret
   %tmp0 = call i32 %arg0() [ "ptrauth"(i32 0, i64 0) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_call_ib_0:
-; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
-; CHECK-NEXT:  blrabz x0
-; CHECK-NEXT:  ldp x29, x30, [sp], #16
-; CHECK-NEXT:  ret
 define i32 @test_call_ib_0(i32 ()* %arg0) #0 {
+; DARWIN-LABEL: test_call_ib_0:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]!
+; DARWIN-NEXT:    blrabz x0
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16
+; DARWIN-NEXT:    ret
+;
+; ELF-LABEL: test_call_ib_0:
+; ELF-NEXT:    str x30, [sp, #-16]!
+; ELF-NEXT:    blrabz x0
+; ELF-NEXT:    ldr x30, [sp], #16
+; ELF-NEXT:    ret
   %tmp0 = call i32 %arg0() [ "ptrauth"(i32 1, i64 0) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_tailcall_ia_0:
-; CHECK-NEXT:  braaz x0
 define i32 @test_tailcall_ia_0(i32 ()* %arg0) #0 {
+; CHECK-LABEL: test_tailcall_ia_0:
+; CHECK:    braaz x0
   %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 0) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_tailcall_ib_0:
-; CHECK-NEXT:  brabz x0
 define i32 @test_tailcall_ib_0(i32 ()* %arg0) #0 {
+; CHECK-LABEL: test_tailcall_ib_0:
+; CHECK:    brabz x0
   %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 0) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_call_ia_imm:
-; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
-; CHECK-NEXT:  mov x17, #42
-; CHECK-NEXT:  blraa x0, x17
-; CHECK-NEXT:  ldp x29, x30, [sp], #16
-; CHECK-NEXT:  ret
 define i32 @test_call_ia_imm(i32 ()* %arg0) #0 {
+; DARWIN-LABEL: test_call_ia_imm:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]!
+; DARWIN-NEXT:    mov x17, #42
+; DARWIN-NEXT:    blraa x0, x17
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16
+; DARWIN-NEXT:    ret
+;
+; ELF-LABEL: test_call_ia_imm:
+; ELF-NEXT:    str x30, [sp, #-16]!
+; ELF-NEXT:    mov x17, #42
+; ELF-NEXT:    blraa x0, x17
+; ELF-NEXT:    ldr x30, [sp], #16
+; ELF-NEXT:    ret
   %tmp0 = call i32 %arg0() [ "ptrauth"(i32 0, i64 42) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_call_ib_imm:
-; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
-; CHECK-NEXT:  mov x17, #42
-; CHECK-NEXT:  blrab x0, x17
-; CHECK-NEXT:  ldp x29, x30, [sp], #16
-; CHECK-NEXT:  ret
 define i32 @test_call_ib_imm(i32 ()* %arg0) #0 {
+; DARWIN-LABEL: test_call_ib_imm:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]!
+; DARWIN-NEXT:    mov x17, #42
+; DARWIN-NEXT:    blrab x0, x17
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16
+; DARWIN-NEXT:    ret
+;
+; ELF-LABEL: test_call_ib_imm:
+; ELF-NEXT:    str x30, [sp, #-16]!
+; ELF-NEXT:    mov x17, #42
+; ELF-NEXT:    blrab x0, x17
+; ELF-NEXT:    ldr x30, [sp], #16
+; ELF-NEXT:    ret
   %tmp0 = call i32 %arg0() [ "ptrauth"(i32 1, i64 42) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_tailcall_ia_imm:
-; CHECK-NEXT:  mov x16, #42
-; CHECK-NEXT:  braa x0, x16
 define i32 @test_tailcall_ia_imm(i32 ()* %arg0) #0 {
+; CHECK-LABEL: test_tailcall_ia_imm:
+; CHECK-NEXT:    mov x16, #42
+; CHECK-NEXT:    braa x0, x16
   %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 42) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_tailcall_ib_imm:
-; CHECK-NEXT:  mov x16, #42
-; CHECK-NEXT:  brab x0, x16
 define i32 @test_tailcall_ib_imm(i32 ()* %arg0) #0 {
+; CHECK-LABEL: test_tailcall_ib_imm:
+; CHECK-NEXT:    mov x16, #42
+; CHECK-NEXT:    brab x0, x16
   %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 42) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_call_ia_var:
-; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
-; CHECK-NEXT:  ldr x8, [x1]
-; CHECK-NEXT:  blraa x0, x8
-; CHECK-NEXT:  ldp x29, x30, [sp], #16
-; CHECK-NEXT:  ret
 define i32 @test_call_ia_var(i32 ()* %arg0, i64* %arg1) #0 {
+; DARWIN-LABEL: test_call_ia_var:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]!
+; DARWIN-NEXT:    ldr x8, [x1]
+; DARWIN-NEXT:    blraa x0, x8
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16
+; DARWIN-NEXT:    ret
+;
+; ELF-LABEL: test_call_ia_var:
+; ELF-NEXT:    str x30, [sp, #-16]!
+; ELF-NEXT:    ldr x8, [x1]
+; ELF-NEXT:    blraa x0, x8
+; ELF-NEXT:    ldr x30, [sp], #16
+; ELF-NEXT:    ret
   %tmp0 = load i64, i64* %arg1
   %tmp1 = call i32 %arg0() [ "ptrauth"(i32 0, i64 %tmp0) ]
   ret i32 %tmp1
 }
 
-; CHECK-LABEL: _test_call_ib_var:
-; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
-; CHECK-NEXT:  ldr x8, [x1]
-; CHECK-NEXT:  blrab x0, x8
-; CHECK-NEXT:  ldp x29, x30, [sp], #16
-; CHECK-NEXT:  ret
 define i32 @test_call_ib_var(i32 ()* %arg0, i64* %arg1) #0 {
+; DARWIN-LABEL: test_call_ib_var:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]!
+; DARWIN-NEXT:    ldr x8, [x1]
+; DARWIN-NEXT:    blrab x0, x8
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16
+; DARWIN-NEXT:    ret
+;
+; ELF-LABEL: test_call_ib_var:
+; ELF-NEXT:    str x30, [sp, #-16]!
+; ELF-NEXT:    ldr x8, [x1]
+; ELF-NEXT:    blrab x0, x8
+; ELF-NEXT:    ldr x30, [sp], #16
+; ELF-NEXT:    ret
   %tmp0 = load i64, i64* %arg1
   %tmp1 = call i32 %arg0() [ "ptrauth"(i32 1, i64 %tmp0) ]
   ret i32 %tmp1
 }
 
-; CHECK-LABEL: _test_tailcall_ia_var:
-; CHECK-NEXT:  ldr x1, [x1]
-; CHECK-NEXT:  braa x0, x1
 define i32 @test_tailcall_ia_var(i32 ()* %arg0, i64* %arg1) #0 {
+; CHECK-LABEL: test_tailcall_ia_var:
+; CHECK:    ldr x1, [x1]
+; CHECK:    braa x0, x1
   %tmp0 = load i64, i64* %arg1
   %tmp1 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 %tmp0) ]
   ret i32 %tmp1
 }
 
-; CHECK-LABEL: _test_tailcall_ib_var:
-; CHECK-NEXT:  ldr x1, [x1]
-; CHECK-NEXT:  brab x0, x1
 define i32 @test_tailcall_ib_var(i32 ()* %arg0, i64* %arg1) #0 {
+; CHECK-LABEL: test_tailcall_ib_var:
+; CHECK:    ldr x1, [x1]
+; CHECK:    brab x0, x1
   %tmp0 = load i64, i64* %arg1
   %tmp1 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 %tmp0) ]
   ret i32 %tmp1
 }
 
-; CHECK-LABEL: _test_call_ia_arg:
-; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
-; CHECK-NEXT:  blraa x0, x1
-; CHECK-NEXT:  ldp x29, x30, [sp], #16
-; CHECK-NEXT:  ret
 define i32 @test_call_ia_arg(i32 ()* %arg0, i64 %arg1) #0 {
+; DARWIN-LABEL: test_call_ia_arg:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]!
+; DARWIN-NEXT:    blraa x0, x1
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16
+; DARWIN-NEXT:    ret
+;
+; ELF-LABEL: test_call_ia_arg:
+; ELF-NEXT:    str x30, [sp, #-16]!
+; ELF-NEXT:    blraa x0, x1
+; ELF-NEXT:    ldr x30, [sp], #16
+; ELF-NEXT:    ret
   %tmp0 = call i32 %arg0() [ "ptrauth"(i32 0, i64 %arg1) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_call_ib_arg:
-; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
-; CHECK-NEXT:  blrab x0, x1
-; CHECK-NEXT:  ldp x29, x30, [sp], #16
-; CHECK-NEXT:  ret
 define i32 @test_call_ib_arg(i32 ()* %arg0, i64 %arg1) #0 {
+; DARWIN-LABEL: test_call_ib_arg:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]!
+; DARWIN-NEXT:    blrab x0, x1
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16
+; DARWIN-NEXT:    ret
+;
+; ELF-LABEL: test_call_ib_arg:
+; ELF-NEXT:    str x30, [sp, #-16]!
+; ELF-NEXT:    blrab x0, x1
+; ELF-NEXT:    ldr x30, [sp], #16
+; ELF-NEXT:    ret
   %tmp0 = call i32 %arg0() [ "ptrauth"(i32 1, i64 %arg1) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_tailcall_ia_arg:
-; CHECK-NEXT:  braa x0, x1
 define i32 @test_tailcall_ia_arg(i32 ()* %arg0, i64 %arg1) #0 {
+; CHECK-LABEL: test_tailcall_ia_arg:
+; CHECK:    braa x0, x1
   %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 0, i64 %arg1) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_tailcall_ib_arg:
-; CHECK-NEXT:  brab x0, x1
 define i32 @test_tailcall_ib_arg(i32 ()* %arg0, i64 %arg1) #0 {
+; CHECK-LABEL: test_tailcall_ib_arg:
+; CHECK:    brab x0, x1
   %tmp0 = tail call i32 %arg0() [ "ptrauth"(i32 1, i64 %arg1) ]
   ret i32 %tmp0
 }
 
-; CHECK-LABEL: _test_call_ia_arg_ind:
-; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
-; CHECK-NEXT:  ldr x8, [x0]
-; CHECK-NEXT:  blraa x8, x1
-; CHECK-NEXT:  ldp x29, x30, [sp], #16
-; CHECK-NEXT:  ret
 define i32 @test_call_ia_arg_ind(i32 ()** %arg0, i64 %arg1) #0 {
+; DARWIN-LABEL: test_call_ia_arg_ind:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]!
+; DARWIN-NEXT:    ldr x8, [x0]
+; DARWIN-NEXT:    blraa x8, x1
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16
+; DARWIN-NEXT:    ret
+;
+; ELF-LABEL: test_call_ia_arg_ind:
+; ELF-NEXT:    str x30, [sp, #-16]!
+; ELF-NEXT:    ldr x8, [x0]
+; ELF-NEXT:    blraa x8, x1
+; ELF-NEXT:    ldr x30, [sp], #16
+; ELF-NEXT:    ret
   %tmp0 = load i32 ()*, i32 ()** %arg0
   %tmp1 = call i32 %tmp0() [ "ptrauth"(i32 0, i64 %arg1) ]
   ret i32 %tmp1
 }
 
-; CHECK-LABEL: _test_call_ib_arg_ind:
-; CHECK-NEXT:  stp x29, x30, [sp, #-16]!
-; CHECK-NEXT:  ldr x8, [x0]
-; CHECK-NEXT:  blrab x8, x1
-; CHECK-NEXT:  ldp x29, x30, [sp], #16
-; CHECK-NEXT:  ret
 define i32 @test_call_ib_arg_ind(i32 ()** %arg0, i64 %arg1) #0 {
+; DARWIN-LABEL: test_call_ib_arg_ind:
+; DARWIN-NEXT:    stp x29, x30, [sp, #-16]!
+; DARWIN-NEXT:    ldr x8, [x0]
+; DARWIN-NEXT:    blrab x8, x1
+; DARWIN-NEXT:    ldp x29, x30, [sp], #16
+; DARWIN-NEXT:    ret
+;
+; ELF-LABEL: test_call_ib_arg_ind:
+; ELF-NEXT:    str x30, [sp, #-16]!
+; ELF-NEXT:    ldr x8, [x0]
+; ELF-NEXT:    blrab x8, x1
+; ELF-NEXT:    ldr x30, [sp], #16
+; ELF-NEXT:    ret
   %tmp0 = load i32 ()*, i32 ()** %arg0
   %tmp1 = call i32 %tmp0() [ "ptrauth"(i32 1, i64 %arg1) ]
   ret i32 %tmp1
 }
 
-; CHECK-LABEL: _test_tailcall_ia_arg_ind:
-; CHECK-NEXT:  ldr x0, [x0]
-; CHECK-NEXT:  braa x0, x1
 define i32 @test_tailcall_ia_arg_ind(i32 ()** %arg0, i64 %arg1) #0 {
+; CHECK-LABEL: test_tailcall_ia_arg_ind:
+; CHECK:    ldr x0, [x0]
+; CHECK:    braa x0, x1
   %tmp0 = load i32 ()*, i32 ()** %arg0
   %tmp1 = tail call i32 %tmp0() [ "ptrauth"(i32 0, i64 %arg1) ]
   ret i32 %tmp1
 }
 
-; CHECK-LABEL: _test_tailcall_ib_arg_ind:
-; CHECK-NEXT:  ldr x0, [x0]
-; CHECK-NEXT:  brab x0, x1
 define i32 @test_tailcall_ib_arg_ind(i32 ()** %arg0, i64 %arg1) #0 {
+; CHECK-LABEL: test_tailcall_ib_arg_ind:
+; CHECK:    ldr x0, [x0]
+; CHECK:    brab x0, x1
   %tmp0 = load i32 ()*, i32 ()** %arg0
   %tmp1 = tail call i32 %tmp0() [ "ptrauth"(i32 1, i64 %arg1) ]
   ret i32 %tmp1

>From 5f2ab7435994011d4e52c806a5d80264eda2f785 Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed at bougacha.org>
Date: Thu, 30 May 2024 15:13:27 -0700
Subject: [PATCH 5/5] Minor refinements.

- assert ptrauth calls aren't used with PAuth_LR
- more const nits
---
 .../Target/AArch64/AArch64ExpandPseudoInsts.cpp    | 12 ++++++------
 .../Target/AArch64/GISel/AArch64CallLowering.cpp   | 14 +++++++++-----
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 9dc3fe661df8e..aaeb1fcbff834 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -791,7 +791,7 @@ static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,
   // operands of the branch as it expects to be B <target> which is only one
   // operand. Instead they are implicit operands used by the branch.
   while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) {
-    MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx);
+    const MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx);
     assert(MOP.isReg() && "can only add register operands");
     Call->addOperand(MachineOperand::CreateReg(
         MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
@@ -833,11 +833,11 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
   MachineInstr *OriginalCall = nullptr;
 
   if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
-    // Pointer auth call.
-    MachineOperand &CallTarget = MI.getOperand(1);
-    MachineOperand &Key = MI.getOperand(2);
-    MachineOperand &IntDisc = MI.getOperand(3);
-    MachineOperand &AddrDisc = MI.getOperand(4);
+    // ptrauth call.
+    const MachineOperand &CallTarget = MI.getOperand(1);
+    const MachineOperand &Key = MI.getOperand(2);
+    const MachineOperand &IntDisc = MI.getOperand(3);
+    const MachineOperand &AddrDisc = MI.getOperand(4);
 
     assert((Key.getImm() == AArch64PACKey::IA ||
             Key.getImm() == AArch64PACKey::IB) &&
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index d42b78ea55b53..7068d25ed4aaa 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1032,18 +1032,22 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
   if (!IsIndirect)
     return AArch64::TCRETURNdi;
 
-  // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
-  // x16 or x17.
+  // When BTI or PAuthLR are enabled, there are restrictions on using x16 and
+  // x17 to hold the function pointer.
   if (FuncInfo->branchTargetEnforcement()) {
+    if (FuncInfo->branchProtectionPAuthLR()) {
+      assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
+      return AArch64::TCRETURNrix17;
+    }
     if (PAI)
       return AArch64::AUTH_TCRETURN_BTI;
-    if (FuncInfo->branchProtectionPAuthLR())
-      return AArch64::TCRETURNrix17;
     return AArch64::TCRETURNrix16x17;
   }
 
-  if (FuncInfo->branchProtectionPAuthLR())
+  if (FuncInfo->branchProtectionPAuthLR()) {
+    assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
     return AArch64::TCRETURNrinotx16;
+  }
 
   if (PAI)
     return AArch64::AUTH_TCRETURN;



More information about the llvm-branch-commits mailing list