[libunwind] [llvm] [AArch64][Libunwind] Add Support for FEAT_PAuthLR DWARF Instruction (PR #112171)

Jack Styles via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 16 01:08:44 PDT 2024


https://github.com/Stylie777 updated https://github.com/llvm/llvm-project/pull/112171

>From a702473aacc6a9c47eb80b204ee3200c2ff2eb26 Mon Sep 17 00:00:00 2001
From: Jack Styles <jack.styles at arm.com>
Date: Thu, 3 Oct 2024 14:20:10 +0100
Subject: [PATCH 1/6] [PAuthLR] Add support for FEAT_PAuth_LR to libunwind

This introduces support for unwinding programs where return addresses
have been signed using FEAT_PAuth_Lr, where the value of PC is used as
a diversifier (-mbranch-protection=pac-ret+pc).

A new vendor specific call frame instruction is added,
named `DW_CFA_AARCH64_negate_ra_state_with_pc`, to instruct the unwinder
tocapture the value of PC at the point of signing and update bit 1 of
the existing `RA_SIGN_STATE` pseudo-register to flag the need to use it
for authentication.

See https://github.com/ARM-software/abi-aa/pull/245 for the ABI change.

Authored-by: pratlucas <lucas.prates at arm.com>
---
 libunwind/src/DwarfInstructions.hpp | 54 ++++++++++++++++++++++-------
 libunwind/src/DwarfParser.hpp       | 20 +++++++++++
 libunwind/src/dwarf2.h              |  3 +-
 3 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/libunwind/src/DwarfInstructions.hpp b/libunwind/src/DwarfInstructions.hpp
index bd9ece60ee5881..e7c467de80adb6 100644
--- a/libunwind/src/DwarfInstructions.hpp
+++ b/libunwind/src/DwarfInstructions.hpp
@@ -74,8 +74,10 @@ class DwarfInstructions {
     __builtin_unreachable();
   }
 #if defined(_LIBUNWIND_TARGET_AARCH64)
-  static bool getRA_SIGN_STATE(A &addressSpace, R registers, pint_t cfa,
-                               PrologInfo &prolog);
+  static bool isReturnAddressSigned(A &addressSpace, R registers, pint_t cfa,
+                                    PrologInfo &prolog);
+  static bool isReturnAddressSignedWithPC(A &addressSpace, R registers,
+                                          pint_t cfa, PrologInfo &prolog);
 #endif
 };
 
@@ -173,8 +175,9 @@ v128 DwarfInstructions<A, R>::getSavedVectorRegister(
 }
 #if defined(_LIBUNWIND_TARGET_AARCH64)
 template <typename A, typename R>
-bool DwarfInstructions<A, R>::getRA_SIGN_STATE(A &addressSpace, R registers,
-                                               pint_t cfa, PrologInfo &prolog) {
+bool DwarfInstructions<A, R>::isReturnAddressSigned(A &addressSpace,
+                                                    R registers, pint_t cfa,
+                                                    PrologInfo &prolog) {
   pint_t raSignState;
   auto regloc = prolog.savedRegisters[UNW_AARCH64_RA_SIGN_STATE];
   if (regloc.location == CFI_Parser<A>::kRegisterUnused)
@@ -185,6 +188,22 @@ bool DwarfInstructions<A, R>::getRA_SIGN_STATE(A &addressSpace, R registers,
   // Only bit[0] is meaningful.
   return raSignState & 0x01;
 }
+
+template <typename A, typename R>
+bool DwarfInstructions<A, R>::isReturnAddressSignedWithPC(A &addressSpace,
+                                                          R registers,
+                                                          pint_t cfa,
+                                                          PrologInfo &prolog) {
+  pint_t raSignState;
+  auto regloc = prolog.savedRegisters[UNW_AARCH64_RA_SIGN_STATE];
+  if (regloc.location == CFI_Parser<A>::kRegisterUnused)
+    raSignState = static_cast<pint_t>(regloc.value);
+  else
+    raSignState = getSavedRegister(addressSpace, registers, cfa, regloc);
+
+  // Only bit[1] is meaningful.
+  return raSignState & 0x02;
+}
 #endif
 
 template <typename A, typename R>
@@ -288,7 +307,7 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
       // restored. autia1716 is used instead of autia as autia1716 assembles
       // to a NOP on pre-v8.3a architectures.
       if ((R::getArch() == REGISTERS_ARM64) &&
-          getRA_SIGN_STATE(addressSpace, registers, cfa, prolog) &&
+          isReturnAddressSigned(addressSpace, registers, cfa, prolog) &&
           returnAddress != 0) {
 #if !defined(_LIBUNWIND_IS_NATIVE_ONLY)
         return UNW_ECROSSRASIGNING;
@@ -296,13 +315,24 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
         register unsigned long long x17 __asm("x17") = returnAddress;
         register unsigned long long x16 __asm("x16") = cfa;
 
-        // These are the autia1716/autib1716 instructions. The hint instructions
-        // are used here as gcc does not assemble autia1716/autib1716 for pre
-        // armv8.3a targets.
-        if (cieInfo.addressesSignedWithBKey)
-          asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716
-        else
-          asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716
+        // We use the hint versions of the authentication instructions below to
+        // ensure they're assembled by the compiler even for targets with no
+        // FEAT_PAuth/FEAT_PAuth_LR support.
+        if(isReturnAddressSignedWithPC(addressSpace, registers, cfa, prolog)) {
+          register unsigned long long x15 __asm("x15") = prolog.ptrAuthDiversifier;
+          if(cieInfo.addressesSignedWithBKey) {
+            asm("hint 0x27\n\t" // pacm
+                "hint 0xe" : "+r"(x17) : "r"(x16),  "r"(x15)); // autib1716
+          } else {
+            asm("hint 0x27\n\t" // pacm
+                "hint 0xc" : "+r"(x17) : "r"(x16), "r"(x15)); // autia1716
+          }
+        } else {
+          if (cieInfo.addressesSignedWithBKey)
+            asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716
+          else
+            asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716
+        }
         returnAddress = x17;
 #endif
       }
diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp
index 0682942ce13799..b104d773ed4440 100644
--- a/libunwind/src/DwarfParser.hpp
+++ b/libunwind/src/DwarfParser.hpp
@@ -91,6 +91,9 @@ class CFI_Parser {
     int64_t           cfaExpression;      // CFA = expression
     uint32_t          spExtraArgSize;
     RegisterLocation  savedRegisters[kMaxRegisterNumber + 1];
+    #if defined(_LIBUNWIND_TARGET_AARCH64)
+    pint_t            ptrAuthDiversifier;
+    #endif
     enum class InitializeTime { kLazy, kNormal };
 
     // When saving registers, this data structure is lazily initialized.
@@ -799,6 +802,23 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
         }
         break;
 
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+      case DW_CFA_AARCH64_negate_ra_state_with_pc: {
+        int64_t value =
+            results->savedRegisters[UNW_AARCH64_RA_SIGN_STATE].value ^ 0x3;
+        results->setRegisterValue(UNW_AARCH64_RA_SIGN_STATE, value,
+                                  initialState);
+        // When calucating the value of the PC, it is assumed that the CFI instruction
+        // is placed before the signing instruction, however it is placed after. Because
+        // of this, we need to take into account the CFI instruction is one instruction
+        // call later than expected, and reduce the PC value by 4 bytes to compensate.
+        results->ptrAuthDiversifier = fdeInfo.pcStart + codeOffset - 0x4;
+        _LIBUNWIND_TRACE_DWARF("DW_CFA_AARCH64_negate_ra_state_with_pc(pc=0x%" PRIx64 ")\n",
+                               static_cast<uint64_t>(results->ptrAuthDiversifier));
+      }
+      break;
+#endif
+
 #else
         (void)arch;
 #endif
diff --git a/libunwind/src/dwarf2.h b/libunwind/src/dwarf2.h
index 174277d5a79508..2ad3d3c464e80d 100644
--- a/libunwind/src/dwarf2.h
+++ b/libunwind/src/dwarf2.h
@@ -51,7 +51,8 @@ enum {
   DW_CFA_GNU_negative_offset_extended = 0x2F,
 
   // AARCH64 extensions
-  DW_CFA_AARCH64_negate_ra_state      = 0x2D
+  DW_CFA_AARCH64_negate_ra_state_with_pc = 0x2C,
+  DW_CFA_AARCH64_negate_ra_state         = 0x2D
 };
 
 

>From cd8b71cde66b8a65ef7190cd9b2d4a30f98a334f Mon Sep 17 00:00:00 2001
From: Jack Styles <jack.styles at arm.com>
Date: Thu, 3 Oct 2024 14:31:55 +0100
Subject: [PATCH 2/6] [PAuthLR] Add support for FEAT_PAuth_LR's DWARF frame
 instruction

This introduces compiler and dwarfdump support for emitting
and parsing the new `DW_CFA_AARCH64_negate_ra_state_with_pc`
DWARF instruction for FEAT_PAuth_LR.

This does mean that, when using FEAT_PAuthLR, the improvements
introduced in #96337 cannot be utilised. `.cfi_negate_ra_state_with_pc`
must be emitted directly after the signing instruction, and when bundled
with other CFI calls, leads to faults when running a program. There are
no changes seen when not using FEAT_PAuthLR to how the CFI Instructions
are generated.

See https://github.com/ARM-software/abi-aa/pull/245 for the ABI change
that incororates FEAT_PAuthLR.

Authored-by: pratlucas <lucas.prates at arm.com>
Co-authored by: vhscampos <victor.campos at arm.com>
Co-authored by: Stylie777 <jack.styles at arm.com>
---
 llvm/include/llvm/BinaryFormat/Dwarf.def      |   1 +
 llvm/include/llvm/MC/MCDwarf.h                |   8 +
 llvm/include/llvm/MC/MCStreamer.h             |   1 +
 .../CodeGen/AsmPrinter/AsmPrinterDwarf.cpp    |   3 +
 llvm/lib/CodeGen/CFIInstrInserter.cpp         |   1 +
 llvm/lib/CodeGen/MIRParser/MILexer.cpp        |   2 +
 llvm/lib/CodeGen/MIRParser/MILexer.h          |   1 +
 llvm/lib/CodeGen/MIRParser/MIParser.cpp       |   5 +
 llvm/lib/CodeGen/MachineOperand.cpp           |   4 +
 llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp  |  24 +++
 llvm/lib/MC/MCAsmStreamer.cpp                 |   7 +
 llvm/lib/MC/MCDwarf.cpp                       |   4 +
 llvm/lib/MC/MCStreamer.cpp                    |  10 ++
 .../Target/AArch64/AArch64FrameLowering.cpp   |   5 +-
 .../lib/Target/AArch64/AArch64PointerAuth.cpp |  90 ++++++----
 .../AArch64/AsmParser/AArch64AsmParser.cpp    |  10 ++
 ...sign-return-address-cfi-negate-ra-state.ll |   3 +-
 .../AArch64/sign-return-address-pauth-lr.ll   | 162 ++++++++++++------
 .../MIR/AArch64/return-address-signing.mir    |  23 +++
 .../MC/AArch64/directives-case_insensitive.s  |   2 +
 .../test/MC/AArch64/negate_ra_state_with_pc.s |   7 +
 .../DebugInfo/DWARF/DWARFDebugFrameTest.cpp   |   1 +
 22 files changed, 283 insertions(+), 91 deletions(-)
 create mode 100644 llvm/test/MC/AArch64/negate_ra_state_with_pc.s

diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index d55947fc5103ac..9336f2a454ae47 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -1238,6 +1238,7 @@ HANDLE_DW_CFA(0x16, val_expression)
 // Vendor extensions:
 HANDLE_DW_CFA_PRED(0x1d, MIPS_advance_loc8, SELECT_MIPS64)
 HANDLE_DW_CFA_PRED(0x2d, GNU_window_save, SELECT_SPARC)
+HANDLE_DW_CFA_PRED(0x2c, AARCH64_negate_ra_state_with_pc, SELECT_AARCH64)
 HANDLE_DW_CFA_PRED(0x2d, AARCH64_negate_ra_state, SELECT_AARCH64)
 HANDLE_DW_CFA_PRED(0x2e, GNU_args_size, SELECT_X86)
 // Heterogeneous Debugging Extension defined at
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index bea79545d1ab96..2ceea906ea57a7 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -515,6 +515,7 @@ class MCCFIInstruction {
     OpRegister,
     OpWindowSave,
     OpNegateRAState,
+    OpNegateRAStateWithPC,
     OpGnuArgsSize,
     OpLabel,
   };
@@ -642,6 +643,13 @@ class MCCFIInstruction {
     return MCCFIInstruction(OpNegateRAState, L, 0, INT64_C(0), Loc);
   }
 
+  /// .cfi_negate_ra_state_with_pc AArch64 negate RA state with PC.
+  static MCCFIInstruction createNegateRAStateWithPC(MCSymbol *L,
+                                                    SMLoc Loc = {}) {
+    return MCCFIInstruction(OpNegateRAStateWithPC, L, 0, INT64_C(0), Loc);
+  }
+
+
   /// .cfi_restore says that the rule for Register is now the same as it
   /// was at the beginning of the function, after all initial instructions added
   /// by .cfi_startproc were executed.
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 707aecc5dc578e..a376ba810ba515 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -1022,6 +1022,7 @@ class MCStreamer {
                                SMLoc Loc = {});
   virtual void emitCFIWindowSave(SMLoc Loc = {});
   virtual void emitCFINegateRAState(SMLoc Loc = {});
+  virtual void emitCFINegateRAStateWithPC(SMLoc Loc = {});
   virtual void emitCFILabelDirective(SMLoc Loc, StringRef Name);
 
   virtual void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc());
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 21d0d070c247f4..daad82d26da652 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -236,6 +236,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
   case MCCFIInstruction::OpNegateRAState:
     OutStreamer->emitCFINegateRAState(Loc);
     break;
+  case MCCFIInstruction::OpNegateRAStateWithPC:
+    OutStreamer->emitCFINegateRAStateWithPC(Loc);
+    break;
   case MCCFIInstruction::OpSameValue:
     OutStreamer->emitCFISameValue(Inst.getRegister(), Loc);
     break;
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index f5bedc7b8ecdfc..4217ec6a1cca8a 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -260,6 +260,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
       case MCCFIInstruction::OpEscape:
       case MCCFIInstruction::OpWindowSave:
       case MCCFIInstruction::OpNegateRAState:
+      case MCCFIInstruction::OpNegateRAStateWithPC:
       case MCCFIInstruction::OpGnuArgsSize:
       case MCCFIInstruction::OpLabel:
         break;
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 0809f88fde56b1..5a3806ce57335a 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -238,6 +238,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("window_save", MIToken::kw_cfi_window_save)
       .Case("negate_ra_sign_state",
             MIToken::kw_cfi_aarch64_negate_ra_sign_state)
+      .Case("negate_ra_sign_state_with_pc",
+            MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc)
       .Case("blockaddress", MIToken::kw_blockaddress)
       .Case("intrinsic", MIToken::kw_intrinsic)
       .Case("target-index", MIToken::kw_target_index)
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index 22547483a8a86b..3931da3eaae1d3 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -96,6 +96,7 @@ struct MIToken {
     kw_cfi_undefined,
     kw_cfi_window_save,
     kw_cfi_aarch64_negate_ra_sign_state,
+    kw_cfi_aarch64_negate_ra_sign_state_with_pc,
     kw_blockaddress,
     kw_intrinsic,
     kw_target_index,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 7aaa0f409d5ef9..45847b5830da65 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -2576,6 +2576,10 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
   case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
     CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
     break;
+  case MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc:
+    CFIIndex =
+        MF.addFrameInst(MCCFIInstruction::createNegateRAStateWithPC(nullptr));
+    break;
   case MIToken::kw_cfi_escape: {
     std::string Values;
     if (parseCFIEscapeValues(Values))
@@ -2931,6 +2935,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
   case MIToken::kw_cfi_undefined:
   case MIToken::kw_cfi_window_save:
   case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
+  case MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc:
     return parseCFIOperand(Dest);
   case MIToken::kw_blockaddress:
     return parseBlockAddressOperand(Dest);
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 89d32c3f005e00..cd94213da79893 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -768,6 +768,10 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,
     if (MCSymbol *Label = CFI.getLabel())
       MachineOperand::printSymbol(OS, *Label);
     break;
+  case MCCFIInstruction::OpNegateRAStateWithPC:
+    OS << "negate_ra_sign_state_with_pc ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
   default:
     // TODO: Print the other CFI Operations.
     OS << "<unserializable cfi directive>";
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index aff26824dda104..38e264f233e39b 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -288,6 +288,7 @@ Error CFIProgram::parse(DWARFDataExtractor Data, uint64_t *Offset,
     case DW_CFA_remember_state:
     case DW_CFA_restore_state:
     case DW_CFA_GNU_window_save:
+    case DW_CFA_AARCH64_negate_ra_state_with_pc:
       // No operands
       addInstruction(Opcode);
       break;
@@ -666,6 +667,28 @@ Error UnwindTable::parseRows(const CFIProgram &CFIP, UnwindRow &Row,
       }
       break;
 
+    case dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc: {
+        constexpr uint32_t AArch64DWARFPAuthRaState = 34;
+        auto LRLoc = Row.getRegisterLocations().getRegisterLocation(
+            AArch64DWARFPAuthRaState);
+        if (LRLoc) {
+          if (LRLoc->getLocation() == UnwindLocation::Constant) {
+            // Toggle the constant value of bits[1:0] from 0 to 1 or 1 to 0.
+            LRLoc->setConstant(LRLoc->getConstant() ^ 0x3);
+          } else {
+            return createStringError(
+                errc::invalid_argument,
+                "%s encountered when existing rule for this register is not "
+                "a constant",
+                CFIP.callFrameString(Inst.Opcode).str().c_str());
+          }
+        } else {
+          Row.getRegisterLocations().setRegisterLocation(
+              AArch64DWARFPAuthRaState, UnwindLocation::createIsConstant(0x3));
+        }
+        break;
+      }
+
     case dwarf::DW_CFA_undefined: {
       llvm::Expected<uint64_t> RegNum = Inst.getOperandAsUnsigned(CFIP, 0);
       if (!RegNum)
@@ -847,6 +870,7 @@ CFIProgram::getOperandTypes() {
   DECLARE_OP0(DW_CFA_remember_state);
   DECLARE_OP0(DW_CFA_restore_state);
   DECLARE_OP0(DW_CFA_GNU_window_save);
+  DECLARE_OP0(DW_CFA_AARCH64_negate_ra_state_with_pc);
   DECLARE_OP1(DW_CFA_GNU_args_size, OT_Offset);
   DECLARE_OP0(DW_CFA_nop);
 
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 31b519a3e5c56a..b9ad0b4eac9c7b 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -373,6 +373,7 @@ class MCAsmStreamer final : public MCStreamer {
                        SMLoc Loc) override;
   void emitCFIWindowSave(SMLoc Loc) override;
   void emitCFINegateRAState(SMLoc Loc) override;
+  void emitCFINegateRAStateWithPC(SMLoc Loc) override;
   void emitCFIReturnColumn(int64_t Register) override;
   void emitCFILabelDirective(SMLoc Loc, StringRef Name) override;
 
@@ -2145,6 +2146,12 @@ void MCAsmStreamer::emitCFINegateRAState(SMLoc Loc) {
   EmitEOL();
 }
 
+void MCAsmStreamer::emitCFINegateRAStateWithPC(SMLoc Loc) {
+  MCStreamer::emitCFINegateRAStateWithPC(Loc);
+  OS << "\t.cfi_negate_ra_state_with_pc";
+  EmitEOL();
+}
+
 void MCAsmStreamer::emitCFIReturnColumn(int64_t Register) {
   MCStreamer::emitCFIReturnColumn(Register);
   OS << "\t.cfi_return_column ";
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index 8ff097f29aebd1..e058358fb8ad4b 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -1381,6 +1381,10 @@ void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) {
     Streamer.emitInt8(dwarf::DW_CFA_AARCH64_negate_ra_state);
     return;
 
+  case MCCFIInstruction::OpNegateRAStateWithPC:
+    Streamer.emitInt8(dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc);
+    return;
+
   case MCCFIInstruction::OpUndefined: {
     unsigned Reg = Instr.getRegister();
     Streamer.emitInt8(dwarf::DW_CFA_undefined);
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index 13b162768578c5..5474db1315f141 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -688,6 +688,16 @@ void MCStreamer::emitCFINegateRAState(SMLoc Loc) {
   CurFrame->Instructions.push_back(Instruction);
 }
 
+void MCStreamer::emitCFINegateRAStateWithPC(SMLoc Loc) {
+  MCSymbol *Label = emitCFILabel();
+  MCCFIInstruction Instruction =
+      MCCFIInstruction::createNegateRAStateWithPC(Label, Loc);
+  MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
+  if (!CurFrame)
+    return;
+  CurFrame->Instructions.push_back(Instruction);
+}
+
 void MCStreamer::emitCFIReturnColumn(int64_t Register) {
   MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
   if (!CurFrame)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 1b8eac7fac21f7..c12540809e727d 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -702,7 +702,10 @@ void AArch64FrameLowering::resetCFIToInitialState(
 
   // Flip the RA sign state.
   if (MFI.shouldSignReturnAddress(MF)) {
-    CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+    auto CFIInst = MFI.branchProtectionPAuthLR()
+                       ? MCCFIInstruction::createNegateRAStateWithPC(nullptr)
+                       : MCCFIInstruction::createNegateRAState(nullptr);
+    CFIIndex = MF.addFrameInst(CFIInst);
     BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
   }
 
diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index 92ab4b5c3d251f..0879b5aec37aea 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -71,6 +71,18 @@ FunctionPass *llvm::createAArch64PointerAuthPass() {
 
 char AArch64PointerAuth::ID = 0;
 
+static void emitPACSymOffsetIntoX16(const TargetInstrInfo &TII,
+                                    MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I, DebugLoc DL,
+                                    MCSymbol *PACSym) {
+  BuildMI(MBB, I, DL, TII.get(AArch64::ADRP), AArch64::X16)
+      .addSym(PACSym, AArch64II::MO_PAGE);
+  BuildMI(MBB, I, DL, TII.get(AArch64::ADDXri), AArch64::X16)
+      .addReg(AArch64::X16)
+      .addSym(PACSym, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
+      .addImm(0);
+}
+
 // Where PAuthLR support is not known at compile time, it is supported using
 // PACM. PACM is in the hint space so has no effect when PAuthLR is not
 // supported by the hardware, but will alter the behaviour of PACI*SP, AUTI*SP
@@ -81,12 +93,10 @@ static void BuildPACM(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB,
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   auto &MFnI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
 
-  // ADR X16,<address_of_PACIASP>
+  // Offset to PAC*SP using ADRP + ADD.
   if (PACSym) {
     assert(Flags == MachineInstr::FrameDestroy);
-    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADR))
-        .addReg(AArch64::X16, RegState::Define)
-        .addSym(PACSym);
+    emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
   }
 
   // Only emit PACM if -mbranch-protection has +pc and the target does not
@@ -95,12 +105,49 @@ static void BuildPACM(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB,
     BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM)).setMIFlag(Flags);
 }
 
+static void emitPACCFI(const AArch64Subtarget &Subtarget,
+                       MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                       DebugLoc DL, MachineInstr::MIFlag Flags, bool EmitCFI) {
+  if (!EmitCFI)
+    return;
+
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  auto &MF = *MBB.getParent();
+  auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
+  bool EmitAsyncCFI = MFnI.needsAsyncDwarfUnwindInfo(MF);
+
+  auto CFIInst = MFnI.branchProtectionPAuthLR()
+                     ? MCCFIInstruction::createNegateRAStateWithPC(nullptr)
+                     : MCCFIInstruction::createNegateRAState(nullptr);
+
+  // Because of PAuthLR, when using NegateRAStateWithPC, the CFI instruction cannot
+  // be bundled with other CFI instructions in the prolog, as it needs to directly
+  // follow the signing instruction. This ensures the PC value is captured incase of
+  // an error in the following the following instructions.
+  if (!EmitAsyncCFI && !(MFnI.branchProtectionPAuthLR())) {
+    // Reduce the size of the generated call frame information for synchronous
+    // CFI by bundling the new CFI instruction with others in the prolog, so
+    // that no additional DW_CFA_advance_loc is needed.
+    for (auto I = MBBI; I != MBB.end(); ++I) {
+      if (I->getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
+          I->getFlag(MachineInstr::FrameSetup)) {
+        MBBI = I;
+        break;
+      }
+    }
+  }
+
+  unsigned CFIIndex = MF.addFrameInst(CFIInst);
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+      .addCFIIndex(CFIIndex)
+      .setMIFlags(Flags);
+}
+
 void AArch64PointerAuth::signLR(MachineFunction &MF,
                                 MachineBasicBlock::iterator MBBI) const {
   auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
   bool UseBKey = MFnI.shouldSignWithBKey();
   bool EmitCFI = MFnI.needsDwarfUnwindInfo(MF);
-  bool EmitAsyncCFI = MFnI.needsAsyncDwarfUnwindInfo(MF);
   bool NeedsWinCFI = MF.hasWinCFI();
 
   MachineBasicBlock &MBB = *MBBI->getParent();
@@ -128,6 +175,7 @@ void AArch64PointerAuth::signLR(MachineFunction &MF,
                                                : AArch64::PACIASPPC))
         .setMIFlag(MachineInstr::FrameSetup)
         ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel());
+    emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
   } else {
     BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup);
     BuildMI(MBB, MBBI, DL,
@@ -135,27 +183,10 @@ void AArch64PointerAuth::signLR(MachineFunction &MF,
                                                : AArch64::PACIASP))
         .setMIFlag(MachineInstr::FrameSetup)
         ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel());
+    emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
   }
 
-  if (EmitCFI) {
-    if (!EmitAsyncCFI) {
-      // Reduce the size of the generated call frame information for synchronous
-      // CFI by bundling the new CFI instruction with others in the prolog, so
-      // that no additional DW_CFA_advance_loc is needed.
-      for (auto I = MBBI; I != MBB.end(); ++I) {
-        if (I->getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
-            I->getFlag(MachineInstr::FrameSetup)) {
-          MBBI = I;
-          break;
-        }
-      }
-    }
-    unsigned CFIIndex =
-        MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
-    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-        .addCFIIndex(CFIIndex)
-        .setMIFlags(MachineInstr::FrameSetup);
-  } else if (NeedsWinCFI) {
+  if (!EmitCFI && NeedsWinCFI) {
     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
         .setMIFlag(MachineInstr::FrameSetup);
   }
@@ -190,6 +221,7 @@ void AArch64PointerAuth::authenticateLR(
       !MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
     if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
       assert(PACSym && "No PAC instruction to refer to");
+      emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
       BuildMI(MBB, TI, DL,
               TII->get(UseBKey ? AArch64::RETABSPPCi : AArch64::RETAASPPCi))
           .addSym(PACSym)
@@ -205,24 +237,20 @@ void AArch64PointerAuth::authenticateLR(
   } else {
     if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
       assert(PACSym && "No PAC instruction to refer to");
+      emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
       BuildMI(MBB, MBBI, DL,
               TII->get(UseBKey ? AArch64::AUTIBSPPCi : AArch64::AUTIASPPCi))
           .addSym(PACSym)
           .setMIFlag(MachineInstr::FrameDestroy);
+      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, EmitAsyncCFI);
     } else {
       BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, PACSym);
       BuildMI(MBB, MBBI, DL,
               TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
           .setMIFlag(MachineInstr::FrameDestroy);
+      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, EmitAsyncCFI);
     }
 
-    if (EmitAsyncCFI) {
-      unsigned CFIIndex =
-          MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
-      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex)
-          .setMIFlags(MachineInstr::FrameDestroy);
-    }
     if (NeedsWinCFI) {
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
           .setMIFlag(MachineInstr::FrameDestroy);
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index df69c20b1359fc..cbd8bd1f20558c 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -195,6 +195,7 @@ class AArch64AsmParser : public MCTargetAsmParser {
   bool parseDirectiveReq(StringRef Name, SMLoc L);
   bool parseDirectiveUnreq(SMLoc L);
   bool parseDirectiveCFINegateRAState();
+  bool parseDirectiveCFINegateRAStateWithPC();
   bool parseDirectiveCFIBKeyFrame();
   bool parseDirectiveCFIMTETaggedFrame();
 
@@ -6821,6 +6822,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
     parseDirectiveInst(Loc);
   else if (IDVal == ".cfi_negate_ra_state")
     parseDirectiveCFINegateRAState();
+  else if (IDVal == ".cfi_negate_ra_state_with_pc")
+    parseDirectiveCFINegateRAStateWithPC();
   else if (IDVal == ".cfi_b_key_frame")
     parseDirectiveCFIBKeyFrame();
   else if (IDVal == ".cfi_mte_tagged_frame")
@@ -7271,6 +7274,13 @@ bool AArch64AsmParser::parseDirectiveCFINegateRAState() {
   return false;
 }
 
+bool AArch64AsmParser::parseDirectiveCFINegateRAStateWithPC() {
+  if (parseEOL())
+    return true;
+  getStreamer().emitCFINegateRAStateWithPC();
+  return false;
+}
+
 /// parseDirectiveCFIBKeyFrame
 /// ::= .cfi_b_key
 bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
index eb224bbbd601fb..fbf571eabd8015 100644
--- a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
+++ b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
@@ -229,7 +229,6 @@ attributes #0 = { "sign-return-address"="all" }
 ; CHECK-DUMP:   DW_CFA_restore_state:
 ; CHECK-DUMP:   DW_CFA_AARCH64_negate_ra_state:
 
-; CHECK-DUMP: CFA=WSP{{$}}
 ;; First DW_CFA_AARCH64_negate_ra_state:
 ; CHECK-DUMP: reg34=1
 ;; Second DW_CFA_AARCH64_negate_ra_state:
@@ -238,7 +237,7 @@ attributes #0 = { "sign-return-address"="all" }
 ; CHECK-DUMP: reg34=1
 ;; Third DW_CFA_AARCH64_negate_ra_state:
 ; CHECK-DUMP: reg34=0
-; CHECK-DUMP-NOT: reg34=
+; CHECK-DUMP-NOT: reg34=1
 
 ; baz_sync
 ; CHECK-DUMP-LABEL: FDE
diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll b/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll
index 3d133e02106bc8..fa689d2b9d7fdd 100644
--- a/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll
+++ b/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll
@@ -62,8 +62,9 @@ define i32 @leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-addr
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp0:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    .cfi_negate_ra_state
-; COMPAT-NEXT:    adr x16, .Ltmp0
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:    adrp x16, .Ltmp0
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp0
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -73,8 +74,9 @@ define i32 @leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-addr
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp0:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    .cfi_negate_ra_state
-; V83A-NEXT:    adr x16, .Ltmp0
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:    adrp x16, .Ltmp0
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp0
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -82,7 +84,9 @@ define i32 @leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-addr
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp0:
 ; PAUTHLR-NEXT:    paciasppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp0
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp0
 ; PAUTHLR-NEXT:    retaasppc .Ltmp0
   ret i32 %x
 }
@@ -93,15 +97,16 @@ define i64 @leaf_clobbers_lr(i64 %x) "branch-protection-pauth-lr" "sign-return-a
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp1:
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    //APP
 ; COMPAT-NEXT:    mov x30, x0
 ; COMPAT-NEXT:    //NO_APP
 ; COMPAT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; COMPAT-NEXT:    adr x16, .Ltmp1
+; COMPAT-NEXT:    adrp x16, .Ltmp1
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp1
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -111,15 +116,16 @@ define i64 @leaf_clobbers_lr(i64 %x) "branch-protection-pauth-lr" "sign-return-a
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp1:
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    //APP
 ; V83A-NEXT:    mov x30, x0
 ; V83A-NEXT:    //NO_APP
 ; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; V83A-NEXT:    adr x16, .Ltmp1
+; V83A-NEXT:    adrp x16, .Ltmp1
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp1
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -127,14 +133,16 @@ define i64 @leaf_clobbers_lr(i64 %x) "branch-protection-pauth-lr" "sign-return-a
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp1:
 ; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
 ; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -16
 ; PAUTHLR-NEXT:    //APP
 ; PAUTHLR-NEXT:    mov x30, x0
 ; PAUTHLR-NEXT:    //NO_APP
 ; PAUTHLR-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; PAUTHLR-NEXT:    adrp x16, .Ltmp1
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp1
 ; PAUTHLR-NEXT:    retaasppc .Ltmp1
   call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1
   ret i64 %x
@@ -148,13 +156,14 @@ define i32 @non_leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp2:
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    bl foo
 ; COMPAT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; COMPAT-NEXT:    adr x16, .Ltmp2
+; COMPAT-NEXT:    adrp x16, .Ltmp2
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp2
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -164,13 +173,14 @@ define i32 @non_leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp2:
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    bl foo
 ; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; V83A-NEXT:    adr x16, .Ltmp2
+; V83A-NEXT:    adrp x16, .Ltmp2
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp2
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -178,12 +188,14 @@ define i32 @non_leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp2:
 ; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
 ; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -16
 ; PAUTHLR-NEXT:    bl foo
 ; PAUTHLR-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; PAUTHLR-NEXT:    adrp x16, .Ltmp2
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp2
 ; PAUTHLR-NEXT:    retaasppc .Ltmp2
   %call = call i32 @foo(i32 %x)
   ret i32 %call
@@ -195,13 +207,14 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "branch-protection-pauth-lr" "sign-re
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp3:
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    bl foo
 ; COMPAT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; COMPAT-NEXT:    adr x16, .Ltmp3
+; COMPAT-NEXT:    adrp x16, .Ltmp3
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp3
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -211,13 +224,14 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "branch-protection-pauth-lr" "sign-re
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp3:
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    bl foo
 ; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; V83A-NEXT:    adr x16, .Ltmp3
+; V83A-NEXT:    adrp x16, .Ltmp3
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp3
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -225,12 +239,14 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "branch-protection-pauth-lr" "sign-re
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp3:
 ; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
 ; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -16
 ; PAUTHLR-NEXT:    bl foo
 ; PAUTHLR-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; PAUTHLR-NEXT:    adrp x16, .Ltmp3
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp3
 ; PAUTHLR-NEXT:    retaasppc .Ltmp3
   %call = call i32 @foo(i32 %x)
   ret i32 %call
@@ -245,13 +261,14 @@ define i32 @non_leaf_scs(i32 %x) "branch-protection-pauth-lr" "sign-return-addre
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:  .Ltmp4:
 ; CHECK-NEXT:    paciasp
+; CHECK-NEXT:    .cfi_negate_ra_state_with_pc
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_negate_ra_state
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl foo
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    adr x16, .Ltmp4
+; CHECK-NEXT:    adrp x16, .Ltmp4
+; CHECK-NEXT:    add x16, x16, :lo12:.Ltmp4
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:    autiasp
 ; CHECK-NEXT:    ldr x30, [x18, #-8]!
@@ -263,12 +280,14 @@ define i32 @non_leaf_scs(i32 %x) "branch-protection-pauth-lr" "sign-return-addre
 ; PAUTHLR-NEXT:    .cfi_escape 0x16, 0x12, 0x02, 0x82, 0x78 //
 ; PAUTHLR-NEXT:  .Ltmp4:
 ; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
 ; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -16
 ; PAUTHLR-NEXT:    bl foo
 ; PAUTHLR-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; PAUTHLR-NEXT:    adrp x16, .Ltmp4
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp4
 ; PAUTHLR-NEXT:    autiasppc .Ltmp4
 ; PAUTHLR-NEXT:    ldr x30, [x18, #-8]!
 ; PAUTHLR-NEXT:    ret
@@ -282,8 +301,9 @@ define i32 @leaf_sign_all_v83(i32 %x) "branch-protection-pauth-lr" "sign-return-
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:  .Ltmp5:
 ; CHECK-NEXT:    paciasp
-; CHECK-NEXT:    .cfi_negate_ra_state
-; CHECK-NEXT:    adr x16, .Ltmp5
+; CHECK-NEXT:    .cfi_negate_ra_state_with_pc
+; CHECK-NEXT:    adrp x16, .Ltmp5
+; CHECK-NEXT:    add x16, x16, :lo12:.Ltmp5
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:    retaa
 ;
@@ -291,7 +311,9 @@ define i32 @leaf_sign_all_v83(i32 %x) "branch-protection-pauth-lr" "sign-return-
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp5:
 ; PAUTHLR-NEXT:    paciasppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp5
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp5
 ; PAUTHLR-NEXT:    retaasppc .Ltmp5
   ret i32 %x
 }
@@ -304,15 +326,16 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "branch-protection-pauth-lr"
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp6:
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    //APP
 ; COMPAT-NEXT:    mov x30, x0
 ; COMPAT-NEXT:    //NO_APP
 ; COMPAT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; COMPAT-NEXT:    adr x16, .Ltmp6
+; COMPAT-NEXT:    adrp x16, .Ltmp6
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp6
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    b bar
@@ -322,15 +345,16 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "branch-protection-pauth-lr"
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp6:
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    //APP
 ; V83A-NEXT:    mov x30, x0
 ; V83A-NEXT:    //NO_APP
 ; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; V83A-NEXT:    adr x16, .Ltmp6
+; V83A-NEXT:    adrp x16, .Ltmp6
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp6
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    autiasp
 ; V83A-NEXT:    b bar
@@ -339,14 +363,16 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "branch-protection-pauth-lr"
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp6:
 ; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
 ; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -16
 ; PAUTHLR-NEXT:    //APP
 ; PAUTHLR-NEXT:    mov x30, x0
 ; PAUTHLR-NEXT:    //NO_APP
 ; PAUTHLR-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; PAUTHLR-NEXT:    adrp x16, .Ltmp6
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp6
 ; PAUTHLR-NEXT:    autiasppc .Ltmp6
 ; PAUTHLR-NEXT:    b bar
   call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1
@@ -360,8 +386,9 @@ define i32 @leaf_sign_all_a_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp7:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    .cfi_negate_ra_state
-; COMPAT-NEXT:    adr x16, .Ltmp7
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:    adrp x16, .Ltmp7
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp7
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -371,8 +398,9 @@ define i32 @leaf_sign_all_a_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp7:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    .cfi_negate_ra_state
-; V83A-NEXT:    adr x16, .Ltmp7
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:    adrp x16, .Ltmp7
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp7
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -380,7 +408,9 @@ define i32 @leaf_sign_all_a_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp7:
 ; PAUTHLR-NEXT:    paciasppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp7
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp7
 ; PAUTHLR-NEXT:    retaasppc .Ltmp7
   ret i32 %x
 }
@@ -392,8 +422,9 @@ define i32 @leaf_sign_all_b_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp8:
 ; COMPAT-NEXT:    hint #27
-; COMPAT-NEXT:    .cfi_negate_ra_state
-; COMPAT-NEXT:    adr x16, .Ltmp8
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:    adrp x16, .Ltmp8
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp8
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #31
 ; COMPAT-NEXT:    ret
@@ -404,8 +435,9 @@ define i32 @leaf_sign_all_b_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp8:
 ; V83A-NEXT:    pacibsp
-; V83A-NEXT:    .cfi_negate_ra_state
-; V83A-NEXT:    adr x16, .Ltmp8
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:    adrp x16, .Ltmp8
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp8
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retab
 ;
@@ -414,7 +446,9 @@ define i32 @leaf_sign_all_b_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; PAUTHLR-NEXT:    .cfi_b_key_frame
 ; PAUTHLR-NEXT:  .Ltmp8:
 ; PAUTHLR-NEXT:    pacibsppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp8
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp8
 ; PAUTHLR-NEXT:    retabsppc .Ltmp8
   ret i32 %x
 }
@@ -426,8 +460,9 @@ define i32 @leaf_sign_all_v83_b_key(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:  .Ltmp9:
 ; CHECK-NEXT:    pacibsp
-; CHECK-NEXT:    .cfi_negate_ra_state
-; CHECK-NEXT:    adr x16, .Ltmp9
+; CHECK-NEXT:    .cfi_negate_ra_state_with_pc
+; CHECK-NEXT:    adrp x16, .Ltmp9
+; CHECK-NEXT:    add x16, x16, :lo12:.Ltmp9
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:    retab
 ;
@@ -436,7 +471,9 @@ define i32 @leaf_sign_all_v83_b_key(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; PAUTHLR-NEXT:    .cfi_b_key_frame
 ; PAUTHLR-NEXT:  .Ltmp9:
 ; PAUTHLR-NEXT:    pacibsppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp9
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp9
 ; PAUTHLR-NEXT:    retabsppc .Ltmp9
   ret i32 %x
 }
@@ -449,8 +486,9 @@ define i32 @leaf_sign_all_a_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp10:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    .cfi_negate_ra_state
-; COMPAT-NEXT:    adr x16, .Ltmp10
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:    adrp x16, .Ltmp10
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp10
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -461,8 +499,9 @@ define i32 @leaf_sign_all_a_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp10:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    .cfi_negate_ra_state
-; V83A-NEXT:    adr x16, .Ltmp10
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:    adrp x16, .Ltmp10
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp10
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -471,7 +510,9 @@ define i32 @leaf_sign_all_a_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; PAUTHLR-NEXT:    bti c
 ; PAUTHLR-NEXT:  .Ltmp10:
 ; PAUTHLR-NEXT:    paciasppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp10
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp10
 ; PAUTHLR-NEXT:    retaasppc .Ltmp10
   ret i32 %x
 }
@@ -485,8 +526,9 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp11:
 ; COMPAT-NEXT:    hint #27
-; COMPAT-NEXT:    .cfi_negate_ra_state
-; COMPAT-NEXT:    adr x16, .Ltmp11
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:    adrp x16, .Ltmp11
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp11
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #31
 ; COMPAT-NEXT:    ret
@@ -498,8 +540,9 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp11:
 ; V83A-NEXT:    pacibsp
-; V83A-NEXT:    .cfi_negate_ra_state
-; V83A-NEXT:    adr x16, .Ltmp11
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:    adrp x16, .Ltmp11
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp11
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retab
 ;
@@ -509,7 +552,9 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; PAUTHLR-NEXT:    .cfi_b_key_frame
 ; PAUTHLR-NEXT:  .Ltmp11:
 ; PAUTHLR-NEXT:    pacibsppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp11
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp11
 ; PAUTHLR-NEXT:    retabsppc .Ltmp11
   ret i32 %x
 }
@@ -523,8 +568,9 @@ define i32 @leaf_sign_all_v83_b_key_bti(i32 %x) "branch-protection-pauth-lr" "si
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:  .Ltmp12:
 ; CHECK-NEXT:    pacibsp
-; CHECK-NEXT:    .cfi_negate_ra_state
-; CHECK-NEXT:    adr x16, .Ltmp12
+; CHECK-NEXT:    .cfi_negate_ra_state_with_pc
+; CHECK-NEXT:    adrp x16, .Ltmp12
+; CHECK-NEXT:    add x16, x16, :lo12:.Ltmp12
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:    retab
 ;
@@ -534,7 +580,9 @@ define i32 @leaf_sign_all_v83_b_key_bti(i32 %x) "branch-protection-pauth-lr" "si
 ; PAUTHLR-NEXT:    .cfi_b_key_frame
 ; PAUTHLR-NEXT:  .Ltmp12:
 ; PAUTHLR-NEXT:    pacibsppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp12
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp12
 ; PAUTHLR-NEXT:    retabsppc .Ltmp12
   ret i32 %x
 }
diff --git a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
index a63bb8452ebbe1..d2b063a057139b 100644
--- a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
+++ b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
@@ -12,6 +12,11 @@
   entry:
     ret i32 2
   }
+
+  define dso_local i32 @foobar() "sign-return-address"="all" "branch-protection-pauth-lr"="true" {
+  entry:
+    ret i32 2
+  }
 ...
 ---
 #CHECK: foo
@@ -46,3 +51,21 @@ body:             |
     RET_ReallyLR implicit killed $w0
 
 ...
+---
+#CHECK: foobar
+name:            foobar
+alignment:       4
+tracksRegLiveness: true
+frameInfo:
+  maxCallFrameSize: 0
+#CHECK:    frame-setup PACM
+#CHECK:    frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp, pre-instr-symbol <mcsymbol >
+#CHECK:    frame-setup CFI_INSTRUCTION negate_ra_sign_state_with_pc
+#CHECK:    frame-destroy PACM
+#CHECK:    frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
+body:             |
+  bb.0.entry:
+    $w0 = MOVi32imm 2
+    RET_ReallyLR implicit killed $w0
+
+...
diff --git a/llvm/test/MC/AArch64/directives-case_insensitive.s b/llvm/test/MC/AArch64/directives-case_insensitive.s
index be92e00cfad11a..35a90a1bffea8d 100644
--- a/llvm/test/MC/AArch64/directives-case_insensitive.s
+++ b/llvm/test/MC/AArch64/directives-case_insensitive.s
@@ -32,10 +32,12 @@ fred .REQ x5
 
 .CFI_STARTPROC
 .CFI_NEGATE_RA_STATE
+.CFI_NEGATE_RA_STATE_WITH_PC
 .CFI_B_KEY_FRAME
 .CFI_ENDPROC
 // CHECK: .cfi_startproc
 // CHECK: .cfi_negate_ra_state
+// CHECK: .cfi_negate_ra_state_with_pc
 // CHECK: .cfi_b_key_frame
 // CHECK: .cfi_endproc
 
diff --git a/llvm/test/MC/AArch64/negate_ra_state_with_pc.s b/llvm/test/MC/AArch64/negate_ra_state_with_pc.s
new file mode 100644
index 00000000000000..44b8ab2df9a908
--- /dev/null
+++ b/llvm/test/MC/AArch64/negate_ra_state_with_pc.s
@@ -0,0 +1,7 @@
+//RUN: llvm-mc  -triple=aarch64-arm-none-eabi -o - %s | FileCheck %s
+
+// CHECK: .cfi_negate_ra_state_with_pc
+foo:
+  .cfi_startproc
+  .cfi_negate_ra_state_with_pc
+  .cfi_endproc
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp
index 17fb18fc6b4d24..2be656547c92e0 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp
@@ -174,6 +174,7 @@ TEST(DWARFDebugFrame, InvalidCFIOpcodesTest) {
       dwarf::DW_CFA_MIPS_advance_loc8,
       dwarf::DW_CFA_GNU_window_save,
       dwarf::DW_CFA_AARCH64_negate_ra_state,
+      dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc,
       dwarf::DW_CFA_GNU_args_size};
 
   dwarf::CIE TestCIE = createCIE(/*IsDWARF64=*/false,

>From dc4674c8164e1527bf4ec183ca7d14ca9be35737 Mon Sep 17 00:00:00 2001
From: Jack Styles <jack.styles at arm.com>
Date: Wed, 9 Oct 2024 10:38:03 +0100
Subject: [PATCH 3/6] [PAuthLR] Make CFI Instruction Location Consistent

With the introduction of `.cfi_negate_ra_state_with_pc`, the location of the CFI
Instructions are inconsistent, with `.cfi_negate_ra_state_with_pc` needing to be
emitted directly after the signing instruction, and `.cfi_negate_ra_state` being
emitted with other CFI instructions as part of the prolog.

To ensure consistency between the two CFI Instructions, they are now emitted
after the CFI Instruction, regardless of which one is being used. This reverses the
changes made in #96377, as this is the commit that moved the CFI instructions
initially.
---
 .../lib/Target/AArch64/AArch64PointerAuth.cpp |  18 --
 .../machine-outliner-retaddr-sign-cfi.ll      |   3 +-
 ...tliner-retaddr-sign-diff-scope-same-key.ll |   6 +-
 .../machine-outliner-retaddr-sign-non-leaf.ll | 121 +++++++++++--
 .../machine-outliner-retaddr-sign-regsave.mir |   3 +-
 ...tliner-retaddr-sign-same-scope-diff-key.ll | 139 +++++++++++---
 ...machine-outliner-retaddr-sign-subtarget.ll |   9 +-
 .../machine-outliner-retaddr-sign-thunk.ll    | 169 ++++++++++++++----
 .../AArch64/pacbti-llvm-generated-funcs-2.ll  |   3 +-
 ...sign-return-address-cfi-negate-ra-state.ll |   8 +-
 .../CodeGen/AArch64/sign-return-address.ll    |  18 +-
 11 files changed, 370 insertions(+), 127 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index 0879b5aec37aea..c3ad488fb8e4d1 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -114,29 +114,11 @@ static void emitPACCFI(const AArch64Subtarget &Subtarget,
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   auto &MF = *MBB.getParent();
   auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
-  bool EmitAsyncCFI = MFnI.needsAsyncDwarfUnwindInfo(MF);
 
   auto CFIInst = MFnI.branchProtectionPAuthLR()
                      ? MCCFIInstruction::createNegateRAStateWithPC(nullptr)
                      : MCCFIInstruction::createNegateRAState(nullptr);
 
-  // Because of PAuthLR, when using NegateRAStateWithPC, the CFI instruction cannot
-  // be bundled with other CFI instructions in the prolog, as it needs to directly
-  // follow the signing instruction. This ensures the PC value is captured incase of
-  // an error in the following the following instructions.
-  if (!EmitAsyncCFI && !(MFnI.branchProtectionPAuthLR())) {
-    // Reduce the size of the generated call frame information for synchronous
-    // CFI by bundling the new CFI instruction with others in the prolog, so
-    // that no additional DW_CFA_advance_loc is needed.
-    for (auto I = MBBI; I != MBB.end(); ++I) {
-      if (I->getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
-          I->getFlag(MachineInstr::FrameSetup)) {
-        MBBI = I;
-        break;
-      }
-    }
-  }
-
   unsigned CFIIndex = MF.addFrameInst(CFIInst);
   BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex)
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll
index c64b3842aa5baa..4bbbe40176313a 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll
@@ -11,8 +11,7 @@ define void @a() "sign-return-address"="all" "sign-return-address-key"="b_key" {
 ; CHECK-NEXT:          .cfi_b_key_frame
 ; V8A-NEXT:            hint #27
 ; V83A-NEXT:           pacibsp
-; CHECK:               .cfi_negate_ra_state
-; CHECK-NEXT:          .cfi_def_cfa_offset
+; CHECK-NEXT:          .cfi_negate_ra_state
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll
index 3221815da33c5e..6a11bef08c7406 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll
@@ -7,8 +7,7 @@ define void @a() "sign-return-address"="all" {
 ; CHECK-LABEL:      a:                                     // @a
 ; V8A:              hint #25
 ; V83A:             paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
+; CHECK-NEXT:      .cfi_negate_ra_state
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -55,8 +54,7 @@ define void @c() "sign-return-address"="all" {
 ; CHECK-LABEL:         c:              // @c
 ; V8A:                 hint #25
 ; V83A:                paciasp
-; CHECK:              .cfi_negate_ra_state
-; CHECK-NEXT:         .cfi_def_cfa_offset
+; CHECK-NEXT          .cfi_negate_ra_state
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll
index d43b74b9451aae..1e7224683c6c89 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll
@@ -1,15 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 %s -o - | \
 ; RUN:   FileCheck %s --check-prefixes CHECK,V8A
 ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 -mattr=+v8.3a %s -o - | \
 ; RUN:   FileCheck %s --check-prefixes CHECK,V83A
 
 define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" {
-; CHECK-LABEL:      a:                                     // @a
-; CHECK:                .cfi_b_key_frame
-; V8A-NEXT:             hint #27
-; V83A-NEXT:            pacibsp
-; CHECK:                .cfi_negate_ra_state
-; CHECK-NEXT:           .cfi_def_cfa_offset
+; V8A-LABEL: a:
+; V8A:       // %bb.0:
+; V8A-NEXT:    .cfi_b_key_frame
+; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    bl OUTLINED_FUNCTION_0
+; V8A-NEXT:    //APP
+; V8A-NEXT:    mov x30, x0
+; V8A-NEXT:    //NO_APP
+; V8A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #31
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: a:
+; V83A:       // %bb.0:
+; V83A-NEXT:    .cfi_b_key_frame
+; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    bl OUTLINED_FUNCTION_0
+; V83A-NEXT:    //APP
+; V83A-NEXT:    mov x30, x0
+; V83A-NEXT:    //NO_APP
+; V83A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retab
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -27,12 +56,40 @@ define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
 }
 
 define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" {
-; CHECK-LABEL:      b:                                     // @b
-; CHECK:                .cfi_b_key_frame
-; V8A-NEXT:             hint #27
-; V83A-NEXT:            pacibsp
-; CHECK:                .cfi_negate_ra_state
-; CHECK-NEXT:           .cfi_def_cfa_offset
+; V8A-LABEL: b:
+; V8A:       // %bb.0:
+; V8A-NEXT:    .cfi_b_key_frame
+; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    bl OUTLINED_FUNCTION_0
+; V8A-NEXT:    //APP
+; V8A-NEXT:    mov x30, x0
+; V8A-NEXT:    //NO_APP
+; V8A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #31
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: b:
+; V83A:       // %bb.0:
+; V83A-NEXT:    .cfi_b_key_frame
+; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    bl OUTLINED_FUNCTION_0
+; V83A-NEXT:    //APP
+; V83A-NEXT:    mov x30, x0
+; V83A-NEXT:    //NO_APP
+; V83A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retab
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -50,12 +107,40 @@ define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
 }
 
 define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" {
-; CHECK-LABEL:      c:                                     // @c
-; CHECK:                .cfi_b_key_frame
-; V8A-NEXT:             hint #27
-; V83A-NEXT:            pacibsp
-; CHECK:                .cfi_negate_ra_state
-; CHECK-NEXT:           .cfi_def_cfa_offset
+; V8A-LABEL: c:
+; V8A:       // %bb.0:
+; V8A-NEXT:    .cfi_b_key_frame
+; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    bl OUTLINED_FUNCTION_0
+; V8A-NEXT:    //APP
+; V8A-NEXT:    mov x30, x0
+; V8A-NEXT:    //NO_APP
+; V8A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #31
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: c:
+; V83A:       // %bb.0:
+; V83A-NEXT:    .cfi_b_key_frame
+; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    bl OUTLINED_FUNCTION_0
+; V83A-NEXT:    //APP
+; V83A-NEXT:    mov x30, x0
+; V83A-NEXT:    //NO_APP
+; V83A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retab
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir
index ba27d1c681e3f4..9a983cbd6714ee 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir
@@ -82,8 +82,7 @@ body:             |
 # CHECK:          bb.0:
 # CHECK:            frame-setup EMITBKEY
 # CHECK-NEXT:       frame-setup PACIBSP implicit-def $lr, implicit $lr, implicit $sp
-# CHECK:            frame-setup CFI_INSTRUCTION negate_ra_sign_state
-# CHECK-NEXT:       frame-setup CFI_INSTRUCTION
+# CHECK-NEXT:       frame-setup CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NOT:        OUTLINED_FUNCTION_
 # CHECK:          bb.1:
 # CHECK-NOT:        OUTLINED_FUNCTION_
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll
index 8c36ab4d8f403a..87771f5de4f699 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll
@@ -1,14 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 %s -o - | \
 ; RUN:   FileCheck %s --check-prefixes CHECK,V8A
 ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 -mattr=+v8.3a %s -o - | \
 ; RUN:   FileCheck %s --check-prefixes CHECK,V83A
 
 define void @a() "sign-return-address"="all" {
-; CHECK-LABEL:      a:                                     // @a
-; V8A:              hint #25
-; V83A:             paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
+; V8A-LABEL: a:
+; V8A:       // %bb.0:
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    mov w8, #1 // =0x1
+; V8A-NEXT:    mov w9, #2 // =0x2
+; V8A-NEXT:    stp w9, w8, [sp, #24]
+; V8A-NEXT:    mov w9, #3 // =0x3
+; V8A-NEXT:    mov w8, #4 // =0x4
+; V8A-NEXT:    stp w8, w9, [sp, #16]
+; V8A-NEXT:    mov w9, #5 // =0x5
+; V8A-NEXT:    mov w8, #6 // =0x6
+; V8A-NEXT:    stp w8, w9, [sp, #8]
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: a:
+; V83A:       // %bb.0:
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    mov w8, #1 // =0x1
+; V83A-NEXT:    mov w9, #2 // =0x2
+; V83A-NEXT:    stp w9, w8, [sp, #24]
+; V83A-NEXT:    mov w9, #3 // =0x3
+; V83A-NEXT:    mov w8, #4 // =0x4
+; V83A-NEXT:    stp w8, w9, [sp, #16]
+; V83A-NEXT:    mov w9, #5 // =0x5
+; V83A-NEXT:    mov w8, #6 // =0x6
+; V83A-NEXT:    stp w8, w9, [sp, #8]
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retaa
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -21,19 +53,48 @@ define void @a() "sign-return-address"="all" {
   store i32 4, ptr %4, align 4
   store i32 5, ptr %5, align 4
   store i32 6, ptr %6, align 4
-; V8A:            hint #29
-; V83A:           retaa
   ret void
-; CHECK:          .cfi_endproc
 }
 
 define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" {
-; CHECK-LABEL:      b:                                     // @b
-; CHECK:            .cfi_b_key_frame
-; V8A-NEXT:         hint #27
-; V83A-NEXT:        pacibsp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
+; V8A-LABEL: b:
+; V8A:       // %bb.0:
+; V8A-NEXT:    .cfi_b_key_frame
+; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    mov w8, #1 // =0x1
+; V8A-NEXT:    mov w9, #2 // =0x2
+; V8A-NEXT:    stp w9, w8, [sp, #24]
+; V8A-NEXT:    mov w9, #3 // =0x3
+; V8A-NEXT:    mov w8, #4 // =0x4
+; V8A-NEXT:    stp w8, w9, [sp, #16]
+; V8A-NEXT:    mov w9, #5 // =0x5
+; V8A-NEXT:    mov w8, #6 // =0x6
+; V8A-NEXT:    stp w8, w9, [sp, #8]
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #31
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: b:
+; V83A:       // %bb.0:
+; V83A-NEXT:    .cfi_b_key_frame
+; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    mov w8, #1 // =0x1
+; V83A-NEXT:    mov w9, #2 // =0x2
+; V83A-NEXT:    stp w9, w8, [sp, #24]
+; V83A-NEXT:    mov w9, #3 // =0x3
+; V83A-NEXT:    mov w8, #4 // =0x4
+; V83A-NEXT:    stp w8, w9, [sp, #16]
+; V83A-NEXT:    mov w9, #5 // =0x5
+; V83A-NEXT:    mov w8, #6 // =0x6
+; V83A-NEXT:    stp w8, w9, [sp, #8]
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retab
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -46,19 +107,46 @@ define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" {
   store i32 4, ptr %4, align 4
   store i32 5, ptr %5, align 4
   store i32 6, ptr %6, align 4
-; V8A-NOT:          hint #29
-; V83A-NOT:         autiasp
-; V83A-NOT:         retaa
   ret void
-; CHECK:            .cfi_endproc
 }
 
 define void @c() "sign-return-address"="all" {
-; CHECK-LABEL:      c:                                     // @c
-; V8A:              hint #25
-; V83A:             paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
+; V8A-LABEL: c:
+; V8A:       // %bb.0:
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    mov w8, #1 // =0x1
+; V8A-NEXT:    mov w9, #2 // =0x2
+; V8A-NEXT:    stp w9, w8, [sp, #24]
+; V8A-NEXT:    mov w9, #3 // =0x3
+; V8A-NEXT:    mov w8, #4 // =0x4
+; V8A-NEXT:    stp w8, w9, [sp, #16]
+; V8A-NEXT:    mov w9, #5 // =0x5
+; V8A-NEXT:    mov w8, #6 // =0x6
+; V8A-NEXT:    stp w8, w9, [sp, #8]
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: c:
+; V83A:       // %bb.0:
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    mov w8, #1 // =0x1
+; V83A-NEXT:    mov w9, #2 // =0x2
+; V83A-NEXT:    stp w9, w8, [sp, #24]
+; V83A-NEXT:    mov w9, #3 // =0x3
+; V83A-NEXT:    mov w8, #4 // =0x4
+; V83A-NEXT:    stp w8, w9, [sp, #16]
+; V83A-NEXT:    mov w9, #5 // =0x5
+; V83A-NEXT:    mov w8, #6 // =0x6
+; V83A-NEXT:    stp w8, w9, [sp, #8]
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retaa
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -71,11 +159,10 @@ define void @c() "sign-return-address"="all" {
   store i32 4, ptr %4, align 4
   store i32 5, ptr %5, align 4
   store i32 6, ptr %6, align 4
-; V8A:            hint #29
-; V83A:           retaa
   ret void
-; CHECK:          .cfi_endproc
 }
 
 ; CHECK-NOT:      OUTLINED_FUNCTION_0:
 ; CHECK-NOT:      // -- Begin function
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
index d5ef94e900993c..a7ea32952f3b78 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
@@ -10,8 +10,7 @@ define void @a() #0 {
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
 ; CHECK-NEXT:               pacibsp
-; CHECK:                    .cfi_negate_ra_state
-; CHECK-NEXT:               .cfi_def_cfa_offset
+; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
@@ -35,8 +34,7 @@ define void @b() #0 {
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
 ; CHECK-NEXT:               pacibsp
-; CHECK:                    .cfi_negate_ra_state
-; CHECK-NEXT:               .cfi_def_cfa_offset
+; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
@@ -60,8 +58,7 @@ define void @c() #1 {
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
 ; CHECK-NEXT:               hint #27
-; CHECK:                    .cfi_negate_ra_state
-; CHECK-NEXT:               .cfi_def_cfa_offset
+; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
index 3e361111b54553..da68ea5bf0dbcb 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple aarch64-arm-linux-gnu --enable-machine-outliner -outliner-leaf-descendants=false \
 ; RUN:   -verify-machineinstrs %s -o - | FileCheck --check-prefixes CHECK,V8A %s
 ; RUN: llc -mtriple aarch64 -enable-machine-outliner -outliner-leaf-descendants=false \
@@ -7,15 +8,38 @@
 declare i32 @thunk_called_fn(i32, i32, i32, i32)
 
 define i32 @a() #0 {
-; CHECK-LABEL:  a:                                      // @a
-; CHECK:        // %bb.0:                               // %entry
-; V8A-NEXT:         hint #25
-; V83A-NEXT:        paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
-; V8A:              hint #29
-; V8A-NEXT:         ret
-; V83A:             retaa
+; V8A-LABEL: a:
+; V8A:       // %bb.0: // %entry
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 16
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    mov w0, #1 // =0x1
+; V8A-NEXT:    mov w1, #2 // =0x2
+; V8A-NEXT:    mov w2, #3 // =0x3
+; V8A-NEXT:    mov w3, #4 // =0x4
+; V8A-NEXT:    bl thunk_called_fn
+; V8A-NEXT:    add w0, w0, #8
+; V8A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: a:
+; V83A:       // %bb.0: // %entry
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 16
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    mov w0, #1 // =0x1
+; V83A-NEXT:    mov w1, #2 // =0x2
+; V83A-NEXT:    mov w2, #3 // =0x3
+; V83A-NEXT:    mov w3, #4 // =0x4
+; V83A-NEXT:    bl thunk_called_fn
+; V83A-NEXT:    add w0, w0, #8
+; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V83A-NEXT:    retaa
 entry:
   %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
   %cx = add i32 %call, 8
@@ -23,15 +47,38 @@ entry:
 }
 
 define i32 @b() #0 {
-; CHECK-LABEL:  b:                                      // @b
-; CHECK:        // %bb.0:                               // %entry
-; V8A-NEXT:         hint #25
-; V83A-NEXT:        paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
-; V8A:              hint #29
-; V8A-NEXT:         ret
-; V83A:             retaa
+; V8A-LABEL: b:
+; V8A:       // %bb.0: // %entry
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 16
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    mov w0, #1 // =0x1
+; V8A-NEXT:    mov w1, #2 // =0x2
+; V8A-NEXT:    mov w2, #3 // =0x3
+; V8A-NEXT:    mov w3, #4 // =0x4
+; V8A-NEXT:    bl thunk_called_fn
+; V8A-NEXT:    add w0, w0, #88
+; V8A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: b:
+; V83A:       // %bb.0: // %entry
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 16
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    mov w0, #1 // =0x1
+; V83A-NEXT:    mov w1, #2 // =0x2
+; V83A-NEXT:    mov w2, #3 // =0x3
+; V83A-NEXT:    mov w3, #4 // =0x4
+; V83A-NEXT:    bl thunk_called_fn
+; V83A-NEXT:    add w0, w0, #88
+; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V83A-NEXT:    retaa
 entry:
   %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
   %cx = add i32 %call, 88
@@ -39,15 +86,40 @@ entry:
 }
 
 define hidden i32 @c(ptr %fptr) #0 {
-; CHECK-LABEL:  c:                                      // @c
-; CHECK:        // %bb.0:                               // %entry
-; V8A-NEXT:         hint #25
-; V83A-NEXT:        paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
-; V8A:              hint #29
-; V8A-NEXT:         ret
-; V83A:             retaa
+; V8A-LABEL: c:
+; V8A:       // %bb.0: // %entry
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 16
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    mov x8, x0
+; V8A-NEXT:    mov w0, #1 // =0x1
+; V8A-NEXT:    mov w1, #2 // =0x2
+; V8A-NEXT:    mov w2, #3 // =0x3
+; V8A-NEXT:    mov w3, #4 // =0x4
+; V8A-NEXT:    blr x8
+; V8A-NEXT:    add w0, w0, #8
+; V8A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: c:
+; V83A:       // %bb.0: // %entry
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 16
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    mov x8, x0
+; V83A-NEXT:    mov w0, #1 // =0x1
+; V83A-NEXT:    mov w1, #2 // =0x2
+; V83A-NEXT:    mov w2, #3 // =0x3
+; V83A-NEXT:    mov w3, #4 // =0x4
+; V83A-NEXT:    blr x8
+; V83A-NEXT:    add w0, w0, #8
+; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V83A-NEXT:    retaa
 entry:
   %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4)
   %add = add nsw i32 %call, 8
@@ -55,15 +127,40 @@ entry:
 }
 
 define hidden i32 @d(ptr %fptr) #0 {
-; CHECK-LABEL:  d:                                      // @d
-; CHECK:        // %bb.0:                               // %entry
-; V8A-NEXT:         hint #25
-; V83A-NEXT:        paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
-; V8A:              hint #29
-; V8A-NEXT:         ret
-; V83A:             retaa
+; V8A-LABEL: d:
+; V8A:       // %bb.0: // %entry
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 16
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    mov x8, x0
+; V8A-NEXT:    mov w0, #1 // =0x1
+; V8A-NEXT:    mov w1, #2 // =0x2
+; V8A-NEXT:    mov w2, #3 // =0x3
+; V8A-NEXT:    mov w3, #4 // =0x4
+; V8A-NEXT:    blr x8
+; V8A-NEXT:    add w0, w0, #88
+; V8A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: d:
+; V83A:       // %bb.0: // %entry
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 16
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    mov x8, x0
+; V83A-NEXT:    mov w0, #1 // =0x1
+; V83A-NEXT:    mov w1, #2 // =0x2
+; V83A-NEXT:    mov w2, #3 // =0x3
+; V83A-NEXT:    mov w3, #4 // =0x4
+; V83A-NEXT:    blr x8
+; V83A-NEXT:    add w0, w0, #88
+; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V83A-NEXT:    retaa
 entry:
   %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4)
   %add = add nsw i32 %call, 88
diff --git a/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll b/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll
index 0969ec246399fe..373c4969a9405c 100644
--- a/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll
+++ b/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll
@@ -35,8 +35,7 @@ entry:
 ;; CHECK-LABEL: __llvm_gcov_writeout:
 ;; CHECK:       .cfi_b_key_frame
 ;; CHECK-NEXT:  pacibsp
-;; CHECK:       .cfi_negate_ra_state
-;; CHECK-NEXT:  .cfi_def_cfa_offset
+;; CHECK-NEXT:  .cfi_negate_ra_state
 
 define internal void @__llvm_gcov_reset() unnamed_addr #2 {
 entry:
diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
index fbf571eabd8015..4d4b7c215b978a 100644
--- a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
+++ b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
@@ -10,8 +10,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 {
 ; CHECK-V8A-LABEL: _Z3fooi:
 ; CHECK-V8A:       // %bb.0: // %entry
 ; CHECK-V8A-NEXT:    hint #25
-; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_negate_ra_state
+; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V8A-NEXT:    .cfi_offset w30, -16
 ; CHECK-V8A-NEXT:    str w0, [sp, #8]
@@ -28,8 +28,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 {
 ; CHECK-V83A-LABEL: _Z3fooi:
 ; CHECK-V83A:       // %bb.0: // %entry
 ; CHECK-V83A-NEXT:    paciasp
-; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_negate_ra_state
+; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V83A-NEXT:    .cfi_offset w30, -16
 ; CHECK-V83A-NEXT:    str w0, [sp, #8]
@@ -144,8 +144,8 @@ define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) {
 ; CHECK-V8A-LABEL: baz_sync:
 ; CHECK-V8A:       // %bb.0: // %entry
 ; CHECK-V8A-NEXT:    hint #25
-; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_negate_ra_state
+; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V8A-NEXT:    .cfi_offset w30, -16
 ; CHECK-V8A-NEXT:    cbz w0, .LBB2_2
@@ -165,8 +165,8 @@ define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) {
 ; CHECK-V83A-LABEL: baz_sync:
 ; CHECK-V83A:       // %bb.0: // %entry
 ; CHECK-V83A-NEXT:    paciasp
-; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_negate_ra_state
+; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V83A-NEXT:    .cfi_offset w30, -16
 ; CHECK-V83A-NEXT:    cbz w0, .LBB2_2
diff --git a/llvm/test/CodeGen/AArch64/sign-return-address.ll b/llvm/test/CodeGen/AArch64/sign-return-address.ll
index c33463eb96a687..dafe0d71ceb5f7 100644
--- a/llvm/test/CodeGen/AArch64/sign-return-address.ll
+++ b/llvm/test/CodeGen/AArch64/sign-return-address.ll
@@ -46,8 +46,8 @@ define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf"  {
 ; COMPAT-LABEL: leaf_clobbers_lr:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_negate_ra_state
+; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    //APP
@@ -60,8 +60,8 @@ define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf"  {
 ; V83A-LABEL: leaf_clobbers_lr:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    //APP
@@ -79,8 +79,8 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" {
 ; COMPAT-LABEL: non_leaf_sign_all:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_negate_ra_state
+; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    bl foo
@@ -91,8 +91,8 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" {
 ; V83A-LABEL: non_leaf_sign_all:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    bl foo
@@ -106,8 +106,8 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf"  {
 ; COMPAT-LABEL: non_leaf_sign_non_leaf:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_negate_ra_state
+; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    bl foo
@@ -118,8 +118,8 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf"  {
 ; V83A-LABEL: non_leaf_sign_non_leaf:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    bl foo
@@ -136,8 +136,8 @@ define i32 @non_leaf_scs(i32 %x) "sign-return-address"="non-leaf" shadowcallstac
 ; CHECK-NEXT:    str x30, [x18], #8
 ; CHECK-NEXT:    .cfi_escape 0x16, 0x12, 0x02, 0x82, 0x78 //
 ; CHECK-NEXT:    paciasp
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_negate_ra_state
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl foo
@@ -164,8 +164,8 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" {
 ; COMPAT-LABEL: spill_lr_and_tail_call:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_negate_ra_state
+; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    //APP
@@ -178,8 +178,8 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" {
 ; V83A-LABEL: spill_lr_and_tail_call:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    //APP

>From 6f5bcf81ab29b433c32f67e1af50a243f0c08ce6 Mon Sep 17 00:00:00 2001
From: Jack Styles <jack.styles at arm.com>
Date: Mon, 14 Oct 2024 10:07:40 +0100
Subject: [PATCH 4/6] fixup! Add support for FEAT_PAuth_LR to libunwind

Formatting fixes
---
 libunwind/src/DwarfInstructions.hpp | 15 +++++---
 libunwind/src/DwarfParser.hpp       | 23 ++++++------
 libunwind/src/dwarf2.h              | 57 ++++++++++++++---------------
 3 files changed, 50 insertions(+), 45 deletions(-)

diff --git a/libunwind/src/DwarfInstructions.hpp b/libunwind/src/DwarfInstructions.hpp
index e7c467de80adb6..e7be0d6d5d6354 100644
--- a/libunwind/src/DwarfInstructions.hpp
+++ b/libunwind/src/DwarfInstructions.hpp
@@ -318,14 +318,19 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
         // We use the hint versions of the authentication instructions below to
         // ensure they're assembled by the compiler even for targets with no
         // FEAT_PAuth/FEAT_PAuth_LR support.
-        if(isReturnAddressSignedWithPC(addressSpace, registers, cfa, prolog)) {
-          register unsigned long long x15 __asm("x15") = prolog.ptrAuthDiversifier;
-          if(cieInfo.addressesSignedWithBKey) {
+        if (isReturnAddressSignedWithPC(addressSpace, registers, cfa, prolog)) {
+          register unsigned long long x15 __asm("x15") =
+              prolog.ptrAuthDiversifier;
+          if (cieInfo.addressesSignedWithBKey) {
             asm("hint 0x27\n\t" // pacm
-                "hint 0xe" : "+r"(x17) : "r"(x16),  "r"(x15)); // autib1716
+                "hint 0xe"
+                : "+r"(x17)
+                : "r"(x16), "r"(x15)); // autib1716
           } else {
             asm("hint 0x27\n\t" // pacm
-                "hint 0xc" : "+r"(x17) : "r"(x16), "r"(x15)); // autia1716
+                "hint 0xc"
+                : "+r"(x17)
+                : "r"(x16), "r"(x15)); // autia1716
           }
         } else {
           if (cieInfo.addressesSignedWithBKey)
diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp
index b104d773ed4440..3b099af2eaed36 100644
--- a/libunwind/src/DwarfParser.hpp
+++ b/libunwind/src/DwarfParser.hpp
@@ -91,9 +91,9 @@ class CFI_Parser {
     int64_t           cfaExpression;      // CFA = expression
     uint32_t          spExtraArgSize;
     RegisterLocation  savedRegisters[kMaxRegisterNumber + 1];
-    #if defined(_LIBUNWIND_TARGET_AARCH64)
-    pint_t            ptrAuthDiversifier;
-    #endif
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+    pint_t ptrAuthDiversifier;
+#endif
     enum class InitializeTime { kLazy, kNormal };
 
     // When saving registers, this data structure is lazily initialized.
@@ -808,15 +808,16 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
             results->savedRegisters[UNW_AARCH64_RA_SIGN_STATE].value ^ 0x3;
         results->setRegisterValue(UNW_AARCH64_RA_SIGN_STATE, value,
                                   initialState);
-        // When calucating the value of the PC, it is assumed that the CFI instruction
-        // is placed before the signing instruction, however it is placed after. Because
-        // of this, we need to take into account the CFI instruction is one instruction
-        // call later than expected, and reduce the PC value by 4 bytes to compensate.
+        // When calucating the value of the PC, it is assumed that the CFI
+        // instruction is placed before the signing instruction, however it is
+        // placed after. Because of this, we need to take into account the CFI
+        // instruction is one instruction call later than expected, and reduce
+        // the PC value by 4 bytes to compensate.
         results->ptrAuthDiversifier = fdeInfo.pcStart + codeOffset - 0x4;
-        _LIBUNWIND_TRACE_DWARF("DW_CFA_AARCH64_negate_ra_state_with_pc(pc=0x%" PRIx64 ")\n",
-                               static_cast<uint64_t>(results->ptrAuthDiversifier));
-      }
-      break;
+        _LIBUNWIND_TRACE_DWARF(
+            "DW_CFA_AARCH64_negate_ra_state_with_pc(pc=0x%" PRIx64 ")\n",
+            static_cast<uint64_t>(results->ptrAuthDiversifier));
+      } break;
 #endif
 
 #else
diff --git a/libunwind/src/dwarf2.h b/libunwind/src/dwarf2.h
index 2ad3d3c464e80d..68ad882347203c 100644
--- a/libunwind/src/dwarf2.h
+++ b/libunwind/src/dwarf2.h
@@ -18,44 +18,43 @@
 
 // DWARF unwind instructions
 enum {
-  DW_CFA_nop                 = 0x0,
-  DW_CFA_set_loc             = 0x1,
-  DW_CFA_advance_loc1        = 0x2,
-  DW_CFA_advance_loc2        = 0x3,
-  DW_CFA_advance_loc4        = 0x4,
-  DW_CFA_offset_extended     = 0x5,
-  DW_CFA_restore_extended    = 0x6,
-  DW_CFA_undefined           = 0x7,
-  DW_CFA_same_value          = 0x8,
-  DW_CFA_register            = 0x9,
-  DW_CFA_remember_state      = 0xA,
-  DW_CFA_restore_state       = 0xB,
-  DW_CFA_def_cfa             = 0xC,
-  DW_CFA_def_cfa_register    = 0xD,
-  DW_CFA_def_cfa_offset      = 0xE,
-  DW_CFA_def_cfa_expression  = 0xF,
-  DW_CFA_expression         = 0x10,
+  DW_CFA_nop = 0x0,
+  DW_CFA_set_loc = 0x1,
+  DW_CFA_advance_loc1 = 0x2,
+  DW_CFA_advance_loc2 = 0x3,
+  DW_CFA_advance_loc4 = 0x4,
+  DW_CFA_offset_extended = 0x5,
+  DW_CFA_restore_extended = 0x6,
+  DW_CFA_undefined = 0x7,
+  DW_CFA_same_value = 0x8,
+  DW_CFA_register = 0x9,
+  DW_CFA_remember_state = 0xA,
+  DW_CFA_restore_state = 0xB,
+  DW_CFA_def_cfa = 0xC,
+  DW_CFA_def_cfa_register = 0xD,
+  DW_CFA_def_cfa_offset = 0xE,
+  DW_CFA_def_cfa_expression = 0xF,
+  DW_CFA_expression = 0x10,
   DW_CFA_offset_extended_sf = 0x11,
-  DW_CFA_def_cfa_sf         = 0x12,
-  DW_CFA_def_cfa_offset_sf  = 0x13,
-  DW_CFA_val_offset         = 0x14,
-  DW_CFA_val_offset_sf      = 0x15,
-  DW_CFA_val_expression     = 0x16,
-  DW_CFA_advance_loc        = 0x40, // high 2 bits are 0x1, lower 6 bits are delta
-  DW_CFA_offset             = 0x80, // high 2 bits are 0x2, lower 6 bits are register
-  DW_CFA_restore            = 0xC0, // high 2 bits are 0x3, lower 6 bits are register
+  DW_CFA_def_cfa_sf = 0x12,
+  DW_CFA_def_cfa_offset_sf = 0x13,
+  DW_CFA_val_offset = 0x14,
+  DW_CFA_val_offset_sf = 0x15,
+  DW_CFA_val_expression = 0x16,
+  DW_CFA_advance_loc = 0x40, // high 2 bits are 0x1, lower 6 bits are delta
+  DW_CFA_offset = 0x80,      // high 2 bits are 0x2, lower 6 bits are register
+  DW_CFA_restore = 0xC0,     // high 2 bits are 0x3, lower 6 bits are register
 
   // GNU extensions
-  DW_CFA_GNU_window_save              = 0x2D,
-  DW_CFA_GNU_args_size                = 0x2E,
+  DW_CFA_GNU_window_save = 0x2D,
+  DW_CFA_GNU_args_size = 0x2E,
   DW_CFA_GNU_negative_offset_extended = 0x2F,
 
   // AARCH64 extensions
   DW_CFA_AARCH64_negate_ra_state_with_pc = 0x2C,
-  DW_CFA_AARCH64_negate_ra_state         = 0x2D
+  DW_CFA_AARCH64_negate_ra_state = 0x2D
 };
 
-
 // FSF exception handling Pointer-Encoding constants
 // Used in CFI augmentation by GCC
 enum {

>From 1aed342ce7202090b6f02139532bba37ae09a449 Mon Sep 17 00:00:00 2001
From: Jack Styles <jack.styles at arm.com>
Date: Mon, 14 Oct 2024 10:08:36 +0100
Subject: [PATCH 5/6] fixup! [PAuthLR] Add support for FEAT_PAuth_LR's DWARF
 frame instruction

Formatting Fixes
---
 llvm/include/llvm/MC/MCDwarf.h                |  1 -
 llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp  | 34 +++++++++----------
 .../lib/Target/AArch64/AArch64PointerAuth.cpp |  6 ++--
 3 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index 2ceea906ea57a7..1392336968e74a 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -649,7 +649,6 @@ class MCCFIInstruction {
     return MCCFIInstruction(OpNegateRAStateWithPC, L, 0, INT64_C(0), Loc);
   }
 
-
   /// .cfi_restore says that the rule for Register is now the same as it
   /// was at the beginning of the function, after all initial instructions added
   /// by .cfi_startproc were executed.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 38e264f233e39b..96cb86ad4c3711 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -668,26 +668,26 @@ Error UnwindTable::parseRows(const CFIProgram &CFIP, UnwindRow &Row,
       break;
 
     case dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc: {
-        constexpr uint32_t AArch64DWARFPAuthRaState = 34;
-        auto LRLoc = Row.getRegisterLocations().getRegisterLocation(
-            AArch64DWARFPAuthRaState);
-        if (LRLoc) {
-          if (LRLoc->getLocation() == UnwindLocation::Constant) {
-            // Toggle the constant value of bits[1:0] from 0 to 1 or 1 to 0.
-            LRLoc->setConstant(LRLoc->getConstant() ^ 0x3);
-          } else {
-            return createStringError(
-                errc::invalid_argument,
-                "%s encountered when existing rule for this register is not "
-                "a constant",
-                CFIP.callFrameString(Inst.Opcode).str().c_str());
-          }
+      constexpr uint32_t AArch64DWARFPAuthRaState = 34;
+      auto LRLoc = Row.getRegisterLocations().getRegisterLocation(
+          AArch64DWARFPAuthRaState);
+      if (LRLoc) {
+        if (LRLoc->getLocation() == UnwindLocation::Constant) {
+          // Toggle the constant value of bits[1:0] from 0 to 1 or 1 to 0.
+          LRLoc->setConstant(LRLoc->getConstant() ^ 0x3);
         } else {
-          Row.getRegisterLocations().setRegisterLocation(
-              AArch64DWARFPAuthRaState, UnwindLocation::createIsConstant(0x3));
+          return createStringError(
+              errc::invalid_argument,
+              "%s encountered when existing rule for this register is not "
+              "a constant",
+              CFIP.callFrameString(Inst.Opcode).str().c_str());
         }
-        break;
+      } else {
+        Row.getRegisterLocations().setRegisterLocation(
+            AArch64DWARFPAuthRaState, UnwindLocation::createIsConstant(0x3));
       }
+      break;
+    }
 
     case dwarf::DW_CFA_undefined: {
       llvm::Expected<uint64_t> RegNum = Inst.getOperandAsUnsigned(CFIP, 0);
diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index c3ad488fb8e4d1..2e5688cf60027a 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -224,13 +224,15 @@ void AArch64PointerAuth::authenticateLR(
               TII->get(UseBKey ? AArch64::AUTIBSPPCi : AArch64::AUTIASPPCi))
           .addSym(PACSym)
           .setMIFlag(MachineInstr::FrameDestroy);
-      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, EmitAsyncCFI);
+      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
+                 EmitAsyncCFI);
     } else {
       BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, PACSym);
       BuildMI(MBB, MBBI, DL,
               TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
           .setMIFlag(MachineInstr::FrameDestroy);
-      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, EmitAsyncCFI);
+      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
+                 EmitAsyncCFI);
     }
 
     if (NeedsWinCFI) {

>From d93e91dcde2375df32f132a6420e8f2877483d1e Mon Sep 17 00:00:00 2001
From: Jack Styles <jack.styles at arm.com>
Date: Wed, 16 Oct 2024 09:06:43 +0100
Subject: [PATCH 6/6] fixup! [PAuthLR] Add support for FEAT_PAuth_LR to
 libunwind

---
 libunwind/src/DwarfParser.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp
index 3b099af2eaed36..7e85025dd054d5 100644
--- a/libunwind/src/DwarfParser.hpp
+++ b/libunwind/src/DwarfParser.hpp
@@ -808,7 +808,7 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
             results->savedRegisters[UNW_AARCH64_RA_SIGN_STATE].value ^ 0x3;
         results->setRegisterValue(UNW_AARCH64_RA_SIGN_STATE, value,
                                   initialState);
-        // When calucating the value of the PC, it is assumed that the CFI
+        // When calculating the value of the PC, it is assumed that the CFI
         // instruction is placed before the signing instruction, however it is
         // placed after. Because of this, we need to take into account the CFI
         // instruction is one instruction call later than expected, and reduce



More information about the cfe-commits mailing list