[llvm] [BOLT][AArch64] Handle OpNegateRAState to enable optimizing binaries with pac-ret hardening (PR #120064)

Gergely Bálint via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 1 06:16:38 PDT 2025


https://github.com/bgergely0 updated https://github.com/llvm/llvm-project/pull/120064

>From 9d8186c1fe6edad92420e6a046875c6ac0c41eb5 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Tue, 19 Nov 2024 09:43:25 +0100
Subject: [PATCH 01/20] [BOLT] Recognize paciasp and autiasp instructions

---
 bolt/include/bolt/Core/MCPlusBuilder.h           | 7 +++++++
 bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 6 ++++++
 2 files changed, 13 insertions(+)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index f902a8c43cd1d..0c25ba52ce05e 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -815,6 +815,13 @@ class MCPlusBuilder {
     llvm_unreachable("not implemented");
     return false;
   }
+  virtual bool isPAuth(MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+  }
+
+  virtual bool isPSign(MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+  }
 
   virtual bool isCleanRegXOR(const MCInst &Inst) const {
     llvm_unreachable("not implemented");
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 973261765f951..2266733f956ee 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -894,6 +894,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
     return false;
   }
+  bool isPAuth(MCInst &Inst) const override {
+    return Inst.getOpcode() == AArch64::AUTIASP;
+  }
+  bool isPSign(MCInst &Inst) const override {
+    return Inst.getOpcode() == AArch64::PACIASP;
+  }
 
   bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From,
                       MCPhysReg &To) const override {

>From c22e4d9ad413fcb1997546aee4b9096ca113459a Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 16 Dec 2024 10:45:41 +0100
Subject: [PATCH 02/20] [BOLT] Support for OpNegateRAState

- save OpNegateRAState, RememberState and RestoreState locations
  when parsing input
- determine the RA states from these before other optimizations
  in MarkRAStates Pass
- after optimizations, we can insert OpNegateRAState at state
  boundaries and other needed locations (e.g. split functions)
  in InsertNegateRAStatePass
---
 bolt/include/bolt/Core/BinaryFunction.h       |  45 ++++++
 bolt/include/bolt/Core/MCPlus.h               |   9 +-
 bolt/include/bolt/Core/MCPlusBuilder.h        |  59 ++++++++
 .../bolt/Passes/InsertNegateRAStatePass.h     |  44 ++++++
 bolt/include/bolt/Passes/MarkRAStates.h       |  33 ++++
 bolt/lib/Core/BinaryBasicBlock.cpp            |   6 +-
 bolt/lib/Core/BinaryFunction.cpp              |   1 +
 bolt/lib/Core/Exceptions.cpp                  |  24 ++-
 bolt/lib/Core/MCPlusBuilder.cpp               |  68 +++++++++
 bolt/lib/Passes/CMakeLists.txt                |   2 +
 bolt/lib/Passes/InsertNegateRAStatePass.cpp   | 142 ++++++++++++++++++
 bolt/lib/Passes/MarkRAStates.cpp              | 133 ++++++++++++++++
 bolt/lib/Rewrite/BinaryPassManager.cpp        |   7 +
 13 files changed, 567 insertions(+), 6 deletions(-)
 create mode 100644 bolt/include/bolt/Passes/InsertNegateRAStatePass.h
 create mode 100644 bolt/include/bolt/Passes/MarkRAStates.h
 create mode 100644 bolt/lib/Passes/InsertNegateRAStatePass.cpp
 create mode 100644 bolt/lib/Passes/MarkRAStates.cpp

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index ae580520b9110..ebe12929c0973 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -1646,6 +1646,51 @@ class BinaryFunction {
 
   void setHasInferredProfile(bool Inferred) { HasInferredProfile = Inferred; }
 
+  /// Find corrected offset the same way addCFIInstruction does it to skip NOPs.
+  std::optional<uint64_t> getCorrectedCFIOffset(uint64_t Offset) {
+    assert(!Instructions.empty());
+    auto I = Instructions.lower_bound(Offset);
+    if (Offset == getSize()) {
+      assert(I == Instructions.end() && "unexpected iterator value");
+      // Sometimes compiler issues restore_state after all instructions
+      // in the function (even after nop).
+      --I;
+      Offset = I->first;
+    }
+    assert(I->first == Offset && "CFI pointing to unknown instruction");
+    if (I == Instructions.begin())
+      return {};
+
+    --I;
+    while (I != Instructions.begin() && BC.MIB->isNoop(I->second)) {
+      Offset = I->first;
+      --I;
+    }
+    return Offset;
+  }
+
+  void setInstModifiesRAState(uint8_t CFIOpcode, uint64_t Offset) {
+    std::optional<uint64_t> CorrectedOffset = getCorrectedCFIOffset(Offset);
+    if (CorrectedOffset) {
+      auto I = Instructions.lower_bound(*CorrectedOffset);
+      I--;
+
+      switch (CFIOpcode) {
+      case dwarf::DW_CFA_AARCH64_negate_ra_state:
+        BC.MIB->setNegateRAState(I->second);
+        break;
+      case dwarf::DW_CFA_remember_state:
+        BC.MIB->setRememberState(I->second);
+        break;
+      case dwarf::DW_CFA_restore_state:
+        BC.MIB->setRestoreState(I->second);
+        break;
+      default:
+        assert(0 && "CFI Opcode not covered by function");
+      }
+    }
+  }
+
   void addCFIInstruction(uint64_t Offset, MCCFIInstruction &&Inst) {
     assert(!Instructions.empty());
 
diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h
index 601d709712864..a95bba36c5a6e 100644
--- a/bolt/include/bolt/Core/MCPlus.h
+++ b/bolt/include/bolt/Core/MCPlus.h
@@ -72,7 +72,14 @@ class MCAnnotation {
     kLabel,               /// MCSymbol pointing to this instruction.
     kSize,                /// Size of the instruction.
     kDynamicBranch,       /// Jit instruction patched at runtime.
-    kGeneric              /// First generic annotation.
+    kSigning,             /// Inst is a signing instruction (paciasp, etc.).
+    kSigned,              /// Inst is in a range where RA is signed.
+    kAuthenticating,      /// Authenticating inst (e.g. autiasp).
+    kUnsigned,            /// Inst is in a range where RA is unsigned.
+    kRememberState,       /// Inst has rememberState CFI.
+    kRestoreState,        /// Inst has restoreState CFI.
+    kNegateState,         /// Inst has OpNegateRAState CFI.
+    kGeneric,             /// First generic annotation.
   };
 
   virtual void print(raw_ostream &OS) const = 0;
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 0c25ba52ce05e..05ae63cc63cb9 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -68,6 +68,20 @@ class MCPlusBuilder {
 public:
   using AllocatorIdTy = uint16_t;
 
+  std::optional<int64_t> getAnnotationAtOpIndex(const MCInst &Inst,
+                                                unsigned OpIndex) const {
+    std::optional<unsigned> FirstAnnotationOp = getFirstAnnotationOpIndex(Inst);
+    if (!FirstAnnotationOp)
+      return std::nullopt;
+
+    if (*FirstAnnotationOp > OpIndex || Inst.getNumOperands() < OpIndex)
+      return std::nullopt;
+
+    auto Op = Inst.begin() + OpIndex;
+    const int64_t ImmValue = Op->getImm();
+    return extractAnnotationIndex(ImmValue);
+  }
+
 private:
   /// A struct that represents a single annotation allocator
   struct AnnotationAllocator {
@@ -1296,6 +1310,51 @@ class MCPlusBuilder {
   /// Return true if the instruction is a tail call.
   bool isTailCall(const MCInst &Inst) const;
 
+  /// Stores NegateRAState annotation on \p Inst.
+  void setNegateRAState(MCInst &Inst) const;
+
+  /// Return true if \p Inst has NegateRAState annotation.
+  bool hasNegateRAState(const MCInst &Inst) const;
+
+  /// Sets RememberState annotation on \p Inst.
+  void setRememberState(MCInst &Inst) const;
+
+  /// Return true if \p Inst has RememberState annotation.
+  bool hasRememberState(const MCInst &Inst) const;
+
+  /// Stores RestoreState annotation on \p Inst.
+  void setRestoreState(MCInst &Inst) const;
+
+  /// Return true if \p Inst has RestoreState annotation.
+  bool hasRestoreState(const MCInst &Inst) const;
+
+  /// Stores RA Signed annotation on \p Inst.
+  void setRASigned(MCInst &Inst) const;
+
+  /// Return true if \p Inst has Signed RA annotation.
+  bool isRASigned(const MCInst &Inst) const;
+
+  /// Stores RA Signing annotation on \p Inst.
+  void setRASigning(MCInst &Inst) const;
+
+  /// Return true if \p Inst has Signing RA annotation.
+  bool isRASigning(const MCInst &Inst) const;
+
+  /// Stores Authenticating annotation on \p Inst.
+  void setAuthenticating(MCInst &Inst) const;
+
+  /// Return true if \p Inst has Authenticating annotation.
+  bool isAuthenticating(const MCInst &Inst) const;
+
+  /// Stores RA Unsigned annotation on \p Inst.
+  void setRAUnsigned(MCInst &Inst) const;
+
+  /// Return true if \p Inst has Unsigned RA annotation.
+  bool isRAUnsigned(const MCInst &Inst) const;
+
+  /// Return true if \p Inst doesn't have any annotation related to RA state.
+  bool isRAStateUnknown(const MCInst &Inst) const;
+
   /// Return true if the instruction is a call with an exception handling info.
   virtual bool isInvoke(const MCInst &Inst) const {
     return isCall(Inst) && getEHInfo(Inst);
diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
new file mode 100644
index 0000000000000..e62006baa2eff
--- /dev/null
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -0,0 +1,44 @@
+//===- bolt/Passes/InsertNegateRAStatePass.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InsertNegateRAStatePass class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS
+#define BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS
+
+#include "bolt/Passes/BinaryPasses.h"
+#include <stack>
+
+namespace llvm {
+namespace bolt {
+
+class InsertNegateRAState : public BinaryFunctionPass {
+public:
+  explicit InsertNegateRAState() : BinaryFunctionPass(false) {}
+
+  const char *getName() const override { return "insert-negate-ra-state-pass"; }
+
+  /// Pass entry point
+  Error runOnFunctions(BinaryContext &BC) override;
+  void runOnFunction(BinaryFunction &BF);
+
+private:
+  /// Loops over all instructions and adds OpNegateRAState CFI
+  /// after any pointer signing or authenticating instructions.
+  /// Returns true, if any OpNegateRAState CFIs were added.
+  bool addNegateRAStateAfterPacOrAuth(BinaryFunction &BF);
+  /// Because states are tracked as MCAnnotations on individual instructions,
+  /// newly inserted instructions do not have a state associated with them.
+  /// New states are "inherited" from the last known state.
+  void fixUnknownStates(BinaryFunction &BF);
+};
+
+} // namespace bolt
+} // namespace llvm
+#endif
diff --git a/bolt/include/bolt/Passes/MarkRAStates.h b/bolt/include/bolt/Passes/MarkRAStates.h
new file mode 100644
index 0000000000000..e7a49f813b6a7
--- /dev/null
+++ b/bolt/include/bolt/Passes/MarkRAStates.h
@@ -0,0 +1,33 @@
+//===- bolt/Passes/MarkRAStates.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MarkRAStates class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef BOLT_PASSES_MARK_RA_STATES
+#define BOLT_PASSES_MARK_RA_STATES
+
+#include "bolt/Passes/BinaryPasses.h"
+
+namespace llvm {
+namespace bolt {
+
+class MarkRAStates : public BinaryFunctionPass {
+public:
+  explicit MarkRAStates() : BinaryFunctionPass(false) {}
+
+  const char *getName() const override { return "mark-ra-states"; }
+
+  /// Pass entry point
+  Error runOnFunctions(BinaryContext &BC) override;
+  void runOnFunction(BinaryFunction &BF);
+};
+
+} // namespace bolt
+} // namespace llvm
+#endif
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp
index 311d5c15b8dca..22126eed67a6c 100644
--- a/bolt/lib/Core/BinaryBasicBlock.cpp
+++ b/bolt/lib/Core/BinaryBasicBlock.cpp
@@ -201,7 +201,11 @@ int32_t BinaryBasicBlock::getCFIStateAtInstr(const MCInst *Instr) const {
       InstrSeen = (&Inst == Instr);
       continue;
     }
-    if (Function->getBinaryContext().MIB->isCFI(Inst)) {
+    // Ignoring OpNegateRAState CFIs here, as they dont have a "State"
+    // number associated with them.
+    if (Function->getBinaryContext().MIB->isCFI(Inst) &&
+        (Function->getCFIFor(Inst)->getOperation() !=
+         MCCFIInstruction::OpNegateRAState)) {
       LastCFI = &Inst;
       break;
     }
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff5a5fce..7a044104f3b4e 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -2804,6 +2804,7 @@ struct CFISnapshot {
   void advanceTo(int32_t State) {
     for (int32_t I = CurState, E = State; I != E; ++I) {
       const MCCFIInstruction &Instr = FDE[I];
+      assert(Instr.getOperation() != MCCFIInstruction::OpNegateRAState);
       if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
         update(Instr, I);
         continue;
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index 0b2e63b8ca6a7..63b7ad43b1dec 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -568,10 +568,21 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
     case DW_CFA_remember_state:
       Function.addCFIInstruction(
           Offset, MCCFIInstruction::createRememberState(nullptr));
+
+      if (Function.getBinaryContext().isAArch64())
+        // Support for pointer authentication:
+        // We need to annotate instructions that modify the RA State, to work
+        // out the state of each instruction in MarkRAStates Pass.
+        Function.setInstModifiesRAState(DW_CFA_remember_state, Offset);
       break;
     case DW_CFA_restore_state:
       Function.addCFIInstruction(Offset,
                                  MCCFIInstruction::createRestoreState(nullptr));
+      if (Function.getBinaryContext().isAArch64())
+        // Support for pointer authentication:
+        // We need to annotate instructions that modify the RA State, to work
+        // out the state of each instruction in MarkRAStates Pass.
+        Function.setInstModifiesRAState(DW_CFA_restore_state, Offset);
       break;
     case DW_CFA_def_cfa:
       Function.addCFIInstruction(
@@ -629,11 +640,16 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
         BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
       return false;
     case DW_CFA_GNU_window_save:
-      // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
-      // id but mean different things. The latter is used in AArch64.
+      // DW_CFA_GNU_window_save and DW_CFA_AARCH64_negate_ra_state just use the
+      // same id but mean different things. The latter is used in AArch64.
       if (Function.getBinaryContext().isAArch64()) {
-        Function.addCFIInstruction(
-            Offset, MCCFIInstruction::createNegateRAState(nullptr));
+        // The location OpNegateRAState CFIs are needed
+        // depends on the order of BasicBlocks, which changes during
+        // optimizations. Instead of adding OpNegateRAState CFIs, an annotation
+        // is added to the instruction, to mark that the instruction modifies
+        // the RA State. The actual state for instructions are worked out in
+        // MarkRAStates based on these annotations.
+        Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state, Offset);
         break;
       }
       if (opts::Verbosity >= 1)
diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp
index fa8f4d1df308b..aa2a784442a5f 100644
--- a/bolt/lib/Core/MCPlusBuilder.cpp
+++ b/bolt/lib/Core/MCPlusBuilder.cpp
@@ -149,6 +149,74 @@ bool MCPlusBuilder::isTailCall(const MCInst &Inst) const {
   return false;
 }
 
+void MCPlusBuilder::setNegateRAState(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kNegateState));
+  setAnnotationOpValue(Inst, MCAnnotation::kNegateState, true);
+}
+
+bool MCPlusBuilder::hasNegateRAState(const MCInst &Inst) const {
+  return hasAnnotation(Inst, MCAnnotation::kNegateState);
+}
+
+void MCPlusBuilder::setRememberState(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kRememberState));
+  setAnnotationOpValue(Inst, MCAnnotation::kRememberState, true);
+}
+
+bool MCPlusBuilder::hasRememberState(const MCInst &Inst) const {
+  return hasAnnotation(Inst, MCAnnotation::kRememberState);
+}
+
+void MCPlusBuilder::setRestoreState(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kRestoreState));
+  setAnnotationOpValue(Inst, MCAnnotation::kRestoreState, true);
+}
+
+bool MCPlusBuilder::hasRestoreState(const MCInst &Inst) const {
+  return hasAnnotation(Inst, MCAnnotation::kRestoreState);
+}
+
+void MCPlusBuilder::setRASigned(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kSigned));
+  setAnnotationOpValue(Inst, MCAnnotation::kSigned, true);
+}
+
+bool MCPlusBuilder::isRASigned(const MCInst &Inst) const {
+  return hasAnnotation(Inst, MCAnnotation::kSigned);
+}
+
+void MCPlusBuilder::setRASigning(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kSigning));
+  setAnnotationOpValue(Inst, MCAnnotation::kSigning, true);
+}
+
+bool MCPlusBuilder::isRASigning(const MCInst &Inst) const {
+  return hasAnnotation(Inst, MCAnnotation::kSigning);
+}
+
+void MCPlusBuilder::setAuthenticating(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kAuthenticating));
+  setAnnotationOpValue(Inst, MCAnnotation::kAuthenticating, true);
+}
+
+bool MCPlusBuilder::isAuthenticating(const MCInst &Inst) const {
+  return hasAnnotation(Inst, MCAnnotation::kAuthenticating);
+}
+
+void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kUnsigned));
+  setAnnotationOpValue(Inst, MCAnnotation::kUnsigned, true);
+}
+
+bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const {
+  return hasAnnotation(Inst, MCAnnotation::kUnsigned);
+}
+
+bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const {
+  return !(isRAUnsigned(Inst) || isRASigned(Inst) || isRASigning(Inst) ||
+           isAuthenticating(Inst));
+}
+
 std::optional<MCLandingPad> MCPlusBuilder::getEHInfo(const MCInst &Inst) const {
   if (!isCall(Inst))
     return std::nullopt;
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index 77d2bb9c2bcb5..d7519518f186f 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -17,12 +17,14 @@ add_llvm_library(LLVMBOLTPasses
   IdenticalCodeFolding.cpp
   IndirectCallPromotion.cpp
   Inliner.cpp
+  InsertNegateRAStatePass.cpp
   Instrumentation.cpp
   JTFootprintReduction.cpp
   LongJmp.cpp
   LoopInversionPass.cpp
   LivenessAnalysis.cpp
   MCF.cpp
+  MarkRAStates.cpp
   PatchEntries.cpp
   PAuthGadgetScanner.cpp
   PettisAndHansen.cpp
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
new file mode 100644
index 0000000000000..098ebf8f953b4
--- /dev/null
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -0,0 +1,142 @@
+//===- bolt/Passes/InsertNegateRAStatePass.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InsertNegateRAStatePass class. It inserts
+// OpNegateRAState CFIs to places where the state of two consecutive
+// instructions are different.
+//
+//===----------------------------------------------------------------------===//
+#include "bolt/Passes/InsertNegateRAStatePass.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+
+using namespace llvm;
+
+namespace llvm {
+namespace bolt {
+
+void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
+  BinaryContext &BC = BF.getBinaryContext();
+
+  if (BF.getState() == BinaryFunction::State::Empty)
+    return;
+
+  if (BF.getState() != BinaryFunction::State::CFG &&
+      BF.getState() != BinaryFunction::State::CFG_Finalized) {
+    BC.outs() << "BOLT-INFO: No CFG for " << BF.getPrintName()
+              << " in InsertNegateRAStatePass\n";
+    return;
+  }
+
+  // If none is inserted, the function doesn't need more work.
+  if (!addNegateRAStateAfterPacOrAuth(BF))
+    return;
+
+  fixUnknownStates(BF);
+
+  bool FirstIter = true;
+  MCInst PrevInst;
+  BinaryBasicBlock *PrevBB = nullptr;
+  auto *Begin = BF.getLayout().block_begin();
+  auto *End = BF.getLayout().block_end();
+  for (auto *BB = Begin; BB != End; BB++) {
+
+    // Support for function splitting:
+    // if two consecutive BBs are going to end up in different functions,
+    // we have to negate the RA State, so the new function starts with a Signed
+    // state.
+    if (PrevBB != nullptr &&
+        PrevBB->getFragmentNum() != (*BB)->getFragmentNum() &&
+        BC.MIB->isRASigned(*((*BB)->begin()))) {
+      BF.addCFIInstruction(*BB, (*BB)->begin(),
+                           MCCFIInstruction::createNegateRAState(nullptr));
+    }
+
+    for (auto It = (*BB)->begin(); It != (*BB)->end(); ++It) {
+
+      MCInst &Inst = *It;
+      if (BC.MIB->isCFI(Inst))
+        continue;
+
+      if (!FirstIter) {
+        if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) ||
+            (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) {
+
+          It = BF.addCFIInstruction(
+              *BB, It, MCCFIInstruction::createNegateRAState(nullptr));
+        }
+
+      } else {
+        FirstIter = false;
+      }
+      PrevInst = *It;
+    }
+    PrevBB = *BB;
+  }
+}
+
+bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) {
+  BinaryContext &BC = BF.getBinaryContext();
+  bool FoundAny = false;
+  for (BinaryBasicBlock &BB : BF) {
+    for (auto Iter = BB.begin(); Iter != BB.end(); ++Iter) {
+      MCInst &Inst = *Iter;
+      if (BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) {
+        Iter = BF.addCFIInstruction(
+            &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr));
+        FoundAny = true;
+      }
+    }
+  }
+  return FoundAny;
+}
+
+void InsertNegateRAState::fixUnknownStates(BinaryFunction &BF) {
+  BinaryContext &BC = BF.getBinaryContext();
+  bool FirstIter = true;
+  MCInst PrevInst;
+  for (BinaryBasicBlock &BB : BF) {
+    for (auto It = BB.begin(); It != BB.end(); ++It) {
+
+      MCInst &Inst = *It;
+      if (BC.MIB->isCFI(Inst))
+        continue;
+
+      if (!FirstIter && BC.MIB->isRAStateUnknown(Inst)) {
+        if (BC.MIB->isRASigned(PrevInst) || BC.MIB->isRASigning(PrevInst)) {
+          BC.MIB->setRASigned(Inst);
+        } else if (BC.MIB->isRAUnsigned(PrevInst) ||
+                   BC.MIB->isAuthenticating(PrevInst)) {
+          BC.MIB->setRAUnsigned(Inst);
+        }
+      } else {
+        FirstIter = false;
+      }
+      PrevInst = Inst;
+    }
+  }
+}
+
+Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) {
+  ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
+    runOnFunction(BF);
+  };
+
+  ParallelUtilities::runOnEachFunction(
+      BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr,
+      "InsertNegateRAStatePass");
+
+  return Error::success();
+}
+
+} // end namespace bolt
+} // end namespace llvm
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
new file mode 100644
index 0000000000000..adccf2090c36f
--- /dev/null
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -0,0 +1,133 @@
+//===- bolt/Passes/MarkRAStates.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MarkRAStates class.
+// Three CFIs have an influence on the RA State of an instruction:
+// - NegateRAState flips the RA State,
+// - RememberState pushes the RA State to a stack,
+// - RestoreState pops the RA State from the stack.
+// These are saved as MCAnnotations on instructions they refer to at CFI
+// reading (in CFIReaderWriter::fillCFIInfoFor). In this pass, we can work out
+// the RA State of each instruction, and save it as new MCAnnotations. The new
+// annotations are Signing, Signed, Authenticating and Unsigned. After
+// optimizations, .cfi_negate_ra_state CFIs are added to the places where the
+// state changes in InsertNegateRAStatePass.
+//
+//===----------------------------------------------------------------------===//
+#include "bolt/Passes/MarkRAStates.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+
+#include <iostream>
+#include <optional>
+#include <stack>
+
+using namespace llvm;
+
+namespace llvm {
+namespace bolt {
+
+void MarkRAStates::runOnFunction(BinaryFunction &BF) {
+
+  if (BF.isIgnored())
+    return;
+
+  BinaryContext &BC = BF.getBinaryContext();
+
+  for (BinaryBasicBlock &BB : BF) {
+    for (auto It = BB.begin(); It != BB.end(); ++It) {
+      MCInst &Inst = *It;
+      if ((BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) &&
+          !BC.MIB->hasNegateRAState(Inst)) {
+        // no .cfi_negate_ra_state attached to signing or authenticating instr
+        // means, that this is a function with handwritten assembly, which might
+        // not respect Clang's conventions (e.g. tailcalls are always
+        // authenticated, so functions always start with unsigned RAState when
+        // working with compiler-generated code)
+        BF.setIgnored();
+        BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+                  << BF.getPrintName() << "\n";
+        return;
+      }
+    }
+  }
+
+  bool RAState = false;
+  std::stack<bool> RAStateStack;
+
+  for (BinaryBasicBlock &BB : BF) {
+    for (auto It = BB.begin(); It != BB.end(); ++It) {
+
+      MCInst &Inst = *It;
+      if (BC.MIB->isCFI(Inst))
+        continue;
+
+      if (BC.MIB->isPSign(Inst)) {
+        if (RAState) {
+          // RA signing instructions should only follow unsigned RA state.
+          BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+                    << BF.getPrintName() << "\n";
+          BF.setIgnored();
+          return;
+        }
+        BC.MIB->setRASigning(Inst);
+      } else if (BC.MIB->isPAuth(Inst)) {
+        if (!RAState) {
+          // RA authenticating instructions should only follow signed RA state.
+          BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+                    << BF.getPrintName() << "\n";
+          BF.setIgnored();
+          return;
+        }
+        BC.MIB->setAuthenticating(Inst);
+      } else if (RAState) {
+        BC.MIB->setRASigned(Inst);
+      } else {
+        BC.MIB->setRAUnsigned(Inst);
+      }
+
+      // Updating RAState. All updates are valid from the next instruction.
+      // Because the same instruction can have remember and restore, the order
+      // here is relevant. This is the reason to loop over Annotations instead
+      // of just checking each in a predefined order.
+      for (unsigned int Idx = 0; Idx < Inst.getNumOperands(); Idx++) {
+        std::optional<int64_t> Annotation =
+            BC.MIB->getAnnotationAtOpIndex(Inst, Idx);
+        if (!Annotation)
+          continue;
+        if (Annotation == MCPlus::MCAnnotation::kNegateState)
+          RAState = !RAState;
+        else if (Annotation == MCPlus::MCAnnotation::kRememberState)
+          RAStateStack.push(RAState);
+        else if (Annotation == MCPlus::MCAnnotation::kRestoreState) {
+          RAState = RAStateStack.top();
+          RAStateStack.pop();
+        }
+      }
+    }
+  }
+}
+
+Error MarkRAStates::runOnFunctions(BinaryContext &BC) {
+  ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
+    runOnFunction(BF);
+  };
+
+  ParallelUtilities::runOnEachFunction(
+      BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr,
+      "MarkRAStates");
+
+  return Error::success();
+}
+
+} // end namespace bolt
+} // end namespace llvm
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 996d2e972599d..8b412854dc701 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -19,11 +19,13 @@
 #include "bolt/Passes/IdenticalCodeFolding.h"
 #include "bolt/Passes/IndirectCallPromotion.h"
 #include "bolt/Passes/Inliner.h"
+#include "bolt/Passes/InsertNegateRAStatePass.h"
 #include "bolt/Passes/Instrumentation.h"
 #include "bolt/Passes/JTFootprintReduction.h"
 #include "bolt/Passes/LongJmp.h"
 #include "bolt/Passes/LoopInversionPass.h"
 #include "bolt/Passes/MCF.h"
+#include "bolt/Passes/MarkRAStates.h"
 #include "bolt/Passes/PLTCall.h"
 #include "bolt/Passes/PatchEntries.h"
 #include "bolt/Passes/ProfileQualityStats.h"
@@ -350,6 +352,9 @@ Error BinaryFunctionPassManager::runPasses() {
 Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
   BinaryFunctionPassManager Manager(BC);
 
+  if (BC.isAArch64())
+    Manager.registerPass(std::make_unique<MarkRAStates>());
+
   Manager.registerPass(
       std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts));
 
@@ -509,6 +514,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
     // targets. No extra instructions after this pass, otherwise we may have
     // relocations out of range and crash during linking.
     Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
+
+    Manager.registerPass(std::make_unique<InsertNegateRAState>());
   }
 
   // This pass should always run last.*

>From 2f5a9240ffd2eeb462c1a589fb8d07aca5105170 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 7 Apr 2025 18:19:20 +0200
Subject: [PATCH 03/20] [BOLT][AArch64] Fix which PSign and PAuth variants are
 used (#120064)

- only the ones operating on LR should be marked
    with .cfi_cfi_negate_ra_state
- added support for fused PtrAuth and Ret instructions,
    e.g. RETAA.
---
 bolt/include/bolt/Core/MCPlusBuilder.h        | 22 ++++++++++-----
 .../bolt/Passes/InsertNegateRAStatePass.h     |  4 ++-
 bolt/lib/Passes/InsertNegateRAStatePass.cpp   | 13 ++++++---
 bolt/lib/Passes/MarkRAStates.cpp              |  7 +++--
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 28 +++++++++++++++----
 5 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 05ae63cc63cb9..6d41eaf45b03e 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -610,6 +610,21 @@ class MCPlusBuilder {
     return std::nullopt;
   }
 
+  virtual bool isPSignOnLR(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
+  virtual bool isPAuthOnLR(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
+  virtual bool isPAuthAndRet(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
   /// Returns the register used as a return address. Returns std::nullopt if
   /// not applicable, such as reading the return address from a system register
   /// or from the stack.
@@ -829,13 +844,6 @@ class MCPlusBuilder {
     llvm_unreachable("not implemented");
     return false;
   }
-  virtual bool isPAuth(MCInst &Inst) const {
-    llvm_unreachable("not implemented");
-  }
-
-  virtual bool isPSign(MCInst &Inst) const {
-    llvm_unreachable("not implemented");
-  }
 
   virtual bool isCleanRegXOR(const MCInst &Inst) const {
     llvm_unreachable("not implemented");
diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
index e62006baa2eff..ce73b5a152d12 100644
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -30,7 +30,9 @@ class InsertNegateRAState : public BinaryFunctionPass {
 
 private:
   /// Loops over all instructions and adds OpNegateRAState CFI
-  /// after any pointer signing or authenticating instructions.
+  /// after any pointer signing or authenticating instructions,
+  /// which operate on the LR, except fused ptrauth + ret instructions
+  /// (such as RETAA).
   /// Returns true, if any OpNegateRAState CFIs were added.
   bool addNegateRAStateAfterPacOrAuth(BinaryFunction &BF);
   /// Because states are tracked as MCAnnotations on individual instructions,
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index 098ebf8f953b4..05d4d58c7e4cf 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -46,14 +46,16 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
   bool FirstIter = true;
   MCInst PrevInst;
   BinaryBasicBlock *PrevBB = nullptr;
+  // We need to iterate on BBs in the Layout order
+  // not in the order they are stored in the BF class.
   auto *Begin = BF.getLayout().block_begin();
   auto *End = BF.getLayout().block_end();
   for (auto *BB = Begin; BB != End; BB++) {
 
     // Support for function splitting:
-    // if two consecutive BBs are going to end up in different functions,
-    // we have to negate the RA State, so the new function starts with a Signed
-    // state.
+    // if two consecutive BBs with Signed state are going to end up in different
+    // functions, we have to add a OpNegateRAState to the beginning of the newly
+    // split function, so it starts with a Signed state.
     if (PrevBB != nullptr &&
         PrevBB->getFragmentNum() != (*BB)->getFragmentNum() &&
         BC.MIB->isRASigned(*((*BB)->begin()))) {
@@ -68,6 +70,8 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
         continue;
 
       if (!FirstIter) {
+        // Consecutive instructions with different RAState means we need to add
+        // a OpNegateRAState.
         if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) ||
             (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) {
 
@@ -90,7 +94,8 @@ bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) {
   for (BinaryBasicBlock &BB : BF) {
     for (auto Iter = BB.begin(); Iter != BB.end(); ++Iter) {
       MCInst &Inst = *Iter;
-      if (BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) {
+      if (BC.MIB->isPSignOnLR(Inst) ||
+          (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) {
         Iter = BF.addCFIInstruction(
             &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr));
         FoundAny = true;
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index adccf2090c36f..8121fffb93c9f 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -46,7 +46,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
   for (BinaryBasicBlock &BB : BF) {
     for (auto It = BB.begin(); It != BB.end(); ++It) {
       MCInst &Inst = *It;
-      if ((BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) &&
+      if ((BC.MIB->isPSignOnLR(Inst) ||
+           (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) &&
           !BC.MIB->hasNegateRAState(Inst)) {
         // no .cfi_negate_ra_state attached to signing or authenticating instr
         // means, that this is a function with handwritten assembly, which might
@@ -71,7 +72,7 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
       if (BC.MIB->isCFI(Inst))
         continue;
 
-      if (BC.MIB->isPSign(Inst)) {
+      if (BC.MIB->isPSignOnLR(Inst)) {
         if (RAState) {
           // RA signing instructions should only follow unsigned RA state.
           BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
@@ -80,7 +81,7 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
           return;
         }
         BC.MIB->setRASigning(Inst);
-      } else if (BC.MIB->isPAuth(Inst)) {
+      } else if (BC.MIB->isPAuthOnLR(Inst)) {
         if (!RAState) {
           // RA authenticating instructions should only follow signed RA state.
           BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 2266733f956ee..e0061bd12857c 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -231,6 +231,28 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
   }
 
+  bool isPSignOnLR(const MCInst &Inst) const override {
+    std::optional<MCPhysReg> SignReg = getSignedReg(Inst);
+    return SignReg && *SignReg == AArch64::LR;
+  }
+
+  bool isPAuthOnLR(const MCInst &Inst) const override {
+    // LDR(A|B) should not be covered.
+    bool IsChecked;
+    std::optional<MCPhysReg> AuthReg =
+        getWrittenAuthenticatedReg(Inst, IsChecked);
+    return !IsChecked && AuthReg && *AuthReg == AArch64::LR;
+  }
+
+  bool isPAuthAndRet(const MCInst &Inst) const override {
+    return Inst.getOpcode() == AArch64::RETAA ||
+           Inst.getOpcode() == AArch64::RETAB ||
+           Inst.getOpcode() == AArch64::RETAASPPCi ||
+           Inst.getOpcode() == AArch64::RETABSPPCi ||
+           Inst.getOpcode() == AArch64::RETAASPPCr ||
+           Inst.getOpcode() == AArch64::RETABSPPCr;
+  }
+
   std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const override {
     switch (Inst.getOpcode()) {
     case AArch64::PACIA:
@@ -894,12 +916,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
     return false;
   }
-  bool isPAuth(MCInst &Inst) const override {
-    return Inst.getOpcode() == AArch64::AUTIASP;
-  }
-  bool isPSign(MCInst &Inst) const override {
-    return Inst.getOpcode() == AArch64::PACIASP;
-  }
 
   bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From,
                       MCPhysReg &To) const override {

>From 19ea611bdb90c22bd0451c96c7e60e4a55b4d9ff Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Fri, 4 Apr 2025 11:23:22 +0200
Subject: [PATCH 04/20] [BOLT] Add unit tests for negate_ra_state cfi handling

- also add match_dwarf.py, a tool used by the unit tests.
---
 bolt/lib/Core/BinaryFunction.cpp              |  24 +--
 bolt/test/AArch64/negate-ra-state-incorrect.s |  44 ++++++
 bolt/test/AArch64/negate-ra-state.s           |  42 ++++++
 bolt/test/lit.cfg.py                          |   7 +
 bolt/test/match_dwarf.py                      | 137 ++++++++++++++++++
 5 files changed, 233 insertions(+), 21 deletions(-)
 create mode 100644 bolt/test/AArch64/negate-ra-state-incorrect.s
 create mode 100644 bolt/test/AArch64/negate-ra-state.s
 create mode 100755 bolt/test/match_dwarf.py

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 7a044104f3b4e..c4811888a58f9 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -2782,14 +2782,8 @@ struct CFISnapshot {
     case MCCFIInstruction::OpLLVMDefAspaceCfa:
     case MCCFIInstruction::OpLabel:
     case MCCFIInstruction::OpValOffset:
-      llvm_unreachable("unsupported CFI opcode");
-      break;
     case MCCFIInstruction::OpNegateRAState:
-      if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) {
-        llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. "
-                         "as produced by '-mbranch-protection=pac-ret') are "
-                         "currently not supported by BOLT.");
-      }
+      llvm_unreachable("unsupported CFI opcode");
       break;
     case MCCFIInstruction::OpRememberState:
     case MCCFIInstruction::OpRestoreState:
@@ -2929,15 +2923,9 @@ struct CFISnapshotDiff : public CFISnapshot {
     case MCCFIInstruction::OpLLVMDefAspaceCfa:
     case MCCFIInstruction::OpLabel:
     case MCCFIInstruction::OpValOffset:
+    case MCCFIInstruction::OpNegateRAState:
       llvm_unreachable("unsupported CFI opcode");
       return false;
-    case MCCFIInstruction::OpNegateRAState:
-      if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) {
-        llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. "
-                         "as produced by '-mbranch-protection=pac-ret') are "
-                         "currently not supported by BOLT.");
-      }
-      break;
     case MCCFIInstruction::OpRememberState:
     case MCCFIInstruction::OpRestoreState:
     case MCCFIInstruction::OpGnuArgsSize:
@@ -3086,14 +3074,8 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
     case MCCFIInstruction::OpLLVMDefAspaceCfa:
     case MCCFIInstruction::OpLabel:
     case MCCFIInstruction::OpValOffset:
-      llvm_unreachable("unsupported CFI opcode");
-      break;
     case MCCFIInstruction::OpNegateRAState:
-      if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) {
-        llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. "
-                         "as produced by '-mbranch-protection=pac-ret') are "
-                         "currently not supported by BOLT.");
-      }
+      llvm_unreachable("unsupported CFI opcode");
       break;
     case MCCFIInstruction::OpGnuArgsSize:
       // do not affect CFI state
diff --git a/bolt/test/AArch64/negate-ra-state-incorrect.s b/bolt/test/AArch64/negate-ra-state-incorrect.s
new file mode 100644
index 0000000000000..c6b8b36939f4d
--- /dev/null
+++ b/bolt/test/AArch64/negate-ra-state-incorrect.s
@@ -0,0 +1,44 @@
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags  %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.exe.bolt | FileCheck %s
+
+# check that the output is listing foo as incorrect
+# CHECK: BOLT-INFO: inconsistent RAStates in function foo
+
+# check that foo got Ignored, so it's not in the new .text section
+# RUN: llvm-objdump %t.exe.bolt -d -j .text > %t.exe.dump
+# RUN: not grep "<foo>:" %t.exe.dump
+
+
+# How is this test incorrect?
+# There is an extra .cfi_negate_ra_state in foo.
+# Because of this, we will get to the autiasp (hint #29)
+# in a (seemingly) unsigned state. That is incorrect.
+  .text
+  .globl  foo
+  .p2align        2
+  .type   foo, at function
+foo:
+  .cfi_startproc
+  hint    #25
+  .cfi_negate_ra_state
+  sub     sp, sp, #16
+  stp     x29, x30, [sp, #16]             // 16-byte Folded Spill
+  .cfi_def_cfa_offset 16
+  str     w0, [sp, #12]
+  ldr     w8, [sp, #12]
+  .cfi_negate_ra_state
+  add     w0, w8, #1
+  ldp     x29, x30, [sp, #16]             // 16-byte Folded Reload
+  add     sp, sp, #16
+  hint    #29
+  .cfi_negate_ra_state
+  ret
+.Lfunc_end1:
+  .size   foo, .Lfunc_end1-foo
+  .cfi_endproc
+
+  .global _start
+  .type _start, %function
+_start:
+  b foo
diff --git a/bolt/test/AArch64/negate-ra-state.s b/bolt/test/AArch64/negate-ra-state.s
new file mode 100644
index 0000000000000..11c511a254c71
--- /dev/null
+++ b/bolt/test/AArch64/negate-ra-state.s
@@ -0,0 +1,42 @@
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags  %t.o -o %t.exe -Wl,-q
+
+# RUN: llvm-objdump %t.exe -d > %t.exe.dump
+# RUN: llvm-objdump --dwarf=frames %t.exe > %t.exe.dump-dwarf
+# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > %t.match-dwarf.txt
+
+# RUN: llvm-bolt %t.exe -o %t.exe.bolt
+
+# RUN: llvm-objdump %t.exe.bolt -d > %t.exe.bolt.dump
+# RUN: llvm-objdump --dwarf=frames %t.exe.bolt  > %t.exe.bolt.dump-dwarf
+# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > %t.bolt.match-dwarf.txt
+
+# RUN: diff %t.match-dwarf.txt %t.bolt.match-dwarf.txt
+
+	.text
+	.globl	foo
+	.p2align	2
+	.type	foo, at function
+foo:
+	.cfi_startproc
+	hint	#25
+	.cfi_negate_ra_state
+	sub	sp, sp, #16
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	.cfi_def_cfa_offset 16
+	str	w0, [sp, #12]
+	ldr	w8, [sp, #12]
+	add	w0, w8, #1
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #16
+	hint	#29
+	.cfi_negate_ra_state
+	ret
+.Lfunc_end1:
+	.size	foo, .Lfunc_end1-foo
+	.cfi_endproc
+
+	.global _start
+	.type _start, %function
+_start:
+	b foo
diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py
index 0d05229be2bf3..60617cd76f968 100644
--- a/bolt/test/lit.cfg.py
+++ b/bolt/test/lit.cfg.py
@@ -89,6 +89,7 @@
 config.substitutions.append(("%cxxflags", ""))
 
 link_fdata_cmd = os.path.join(config.test_source_root, "link_fdata.py")
+match_dwarf_cmd = os.path.join(config.test_source_root, "match_dwarf.py")
 
 tool_dirs = [config.llvm_tools_dir, config.test_source_root]
 
@@ -131,6 +132,12 @@
     ToolSubst("llvm-readobj", unresolved="fatal"),
     ToolSubst("llvm-dwp", unresolved="fatal"),
     ToolSubst("split-file", unresolved="fatal"),
+    ToolSubst(
+        "match-dwarf",
+        command=sys.executable,
+        unresolved="fatal",
+        extra_args=[match_dwarf_cmd],
+    ),
 ]
 llvm_config.add_tool_substitutions(tools, tool_dirs)
 
diff --git a/bolt/test/match_dwarf.py b/bolt/test/match_dwarf.py
new file mode 100755
index 0000000000000..3d3ab22042d5c
--- /dev/null
+++ b/bolt/test/match_dwarf.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+
+# This tool helps matching dwarf dumps
+# (= the output from running llvm-objdump --dwarf=frames),
+# by address to function names (which are parsed from a normal objdump).
+# The script is used for checking if .cfi_negate_ra_state CFIs
+# are generated by BOLT the same way they are generated by LLVM.
+# The script is called twice in unittests: once with the objdumps of
+# the BOLT input binary, and once with the output binary from BOLT.
+# We output the offsets of .cfi_negate_ra_state instructions from the
+# function's start address to see that BOLT can generate them to the same
+# locations.
+# Because we check the location, this is only useful for testing without
+# optimization flags, so `llvm-bolt input.exe -o output.exe`
+
+
+import argparse
+import subprocess
+import sys
+import re
+
+
+class NameDwarfPair(object):
+    def __init__(self, name, body):
+        self.name = name
+        self.body = body
+        self.finalized = False
+
+    def append(self, body_line):
+        # only store elements into the body until the first whitespace line is encountered.
+        if body_line.isspace():
+            self.finalized = True
+        if not self.finalized:
+            self.body += body_line
+
+    def print(self):
+        print(self.name)
+        print(self.body)
+
+    def parse_negate_offsets(self):
+        """
+        Create a list of locations/offsets of the negate_ra_state CFIs in the
+        dwarf entry. To find offsets for each, we match the DW_CFA_advance_loc
+        entries, and sum up their values.
+        """
+        negate_offsets = []
+        loc = 0
+        # TODO: make sure this is not printed in hex
+        re_advloc = r"DW_CFA_advance_loc: (\d+)"
+
+        for line in self.body.splitlines():
+            # if line matches advance_loc int
+            match = re.search(re_advloc, line)
+            if match:
+                loc += int(match.group(1))
+            if "DW_CFA_AARCH64_negate_ra_state" in line:
+                negate_offsets.append(loc)
+
+        self.negate_offsets = negate_offsets
+
+    def __eq__(self, other):
+        return self.name == other.name and self.negate_offsets == other.negate_offsets
+
+
+def extract_function_addresses(objdump):
+    """
+    Parse and return address-to-name dictionary from objdump file.
+    Function names in the objdump look like this:
+        000123abc <foo>:
+    We create a dict from the addr (000123abc), to the name (foo).
+    """
+    addr_name_dict = dict()
+    re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$")
+    with open(objdump, "r") as f:
+        for line in f.readlines():
+            match = re_function.match(line)
+            if not match:
+                continue
+            m_addr = match.groups()[0]
+            m_name = match.groups()[1]
+            addr_name_dict[int(m_addr, 16)] = m_name
+
+    return addr_name_dict
+
+
+def match_dwarf_to_name(dwarfdump, addr_name_dict):
+    """
+    Parse dwarf dump, and match names to blocks using the dict from the objdump.
+    Return a list of NameDwarfPairs.
+    The matched lines look like this:
+    000123 000456 000789 FDE cie=000000  pc=0123abc...0456def
+    We do not have the function name for this, only the PC range it applies to.
+    We match the pc=0123abc (the start address), and find the matching name from
+    the addr_name_dict.
+    The resultint NameDwarfPair will hold the lines this header applied to, and
+    instead of the header with the addresses, it will just have the function name.
+    """
+    re_address_line = re.compile(r".*pc=([0-9a-fA-F]+)\.\.\.([0-9a-fA-F]+)")
+    with open(dwarfdump, "r") as dw:
+        functions = []
+        for line in dw.readlines():
+            match = re_address_line.match(line)
+            if not match:
+                if len(functions) > 0:
+                    functions[-1].append(line)
+                continue
+            pc_start_address = match.groups()[0]
+            name = addr_name_dict.get(int(pc_start_address, 16))
+            functions.append(NameDwarfPair(name, ""))
+
+        return functions
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("objdump", help="Objdump file")
+    parser.add_argument(
+        "dwarfdump", help="dwarf dump file created with 'llvm-objdump --dwarf=frames'"
+    )
+    parser.add_argument("function", help="Function to search CFIs in.")
+
+    args = parser.parse_args()
+
+    addr_name_dict = extract_function_addresses(args.objdump)
+    functions = match_dwarf_to_name(args.dwarfdump, addr_name_dict)
+
+    for f in functions:
+        if f.name == args.function:
+            f.parse_negate_offsets()
+            print(f.negate_offsets)
+            break
+    else:
+        print(f"{args.function} not found")
+        exit(-1)
+
+
+main()

>From 9b53bc343a799e379a806287b764fc6197523d9e Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Fri, 23 May 2025 14:11:47 +0200
Subject: [PATCH 05/20] [BOLT] Basic exception unwinding test

---
 bolt/test/runtime/AArch64/negate-ra-state.cpp | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 bolt/test/runtime/AArch64/negate-ra-state.cpp

diff --git a/bolt/test/runtime/AArch64/negate-ra-state.cpp b/bolt/test/runtime/AArch64/negate-ra-state.cpp
new file mode 100644
index 0000000000000..60b0b08950b58
--- /dev/null
+++ b/bolt/test/runtime/AArch64/negate-ra-state.cpp
@@ -0,0 +1,26 @@
+// REQUIRES: system-linux,bolt-runtime
+
+// RUN: %clangxx --target=aarch64-unknown-linux-gnu \
+// RUN: -mbranch-protection=pac-ret -Wl,-q %s -o %t.exe
+// RUN: llvm-bolt %t.exe -o %t.bolt.exe
+// RUN: %t.bolt.exe | FileCheck %s
+
+// CHECK: Exception caught: Exception from bar().
+
+#include <cstdio>
+#include <stdexcept>
+
+void bar() { throw std::runtime_error("Exception from bar()."); }
+
+void foo() {
+  try {
+    bar();
+  } catch (const std::exception &e) {
+    printf("Exception caught: %s\n", e.what());
+  }
+}
+
+int main() {
+  foo();
+  return 0;
+}

>From cf8684765c8d8b81f7a4553f2f12debede5f696f Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Tue, 29 Jul 2025 14:28:34 +0000
Subject: [PATCH 06/20] [BOLT] Add OpNegateRAState to printCFI

---
 bolt/lib/Core/BinaryContext.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 84f1853469709..c8f623c2374bc 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1865,6 +1865,9 @@ void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
   case MCCFIInstruction::OpGnuArgsSize:
     OS << "OpGnuArgsSize";
     break;
+  case MCCFIInstruction::OpNegateRAState:
+    OS << "OpNegateRAState";
+    break;
   default:
     OS << "Op#" << Operation;
     break;

>From f605baf0fd52184a57ebf41baefcfa7991b95ff8 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Tue, 29 Jul 2025 14:28:57 +0000
Subject: [PATCH 07/20] [BOLT] Improve function splitting at OpNegateRAState
 handling

- Previous version used the Layout API which should not be used.
- This version iterates on Fragments to have the same effect.
- NegateRAState is added to the first *non-empty* BB of the cold fragment.
---
 bolt/lib/Passes/InsertNegateRAStatePass.cpp | 44 +++++++++++----------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index 05d4d58c7e4cf..0eb0e3aef00d4 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -43,27 +43,32 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
 
   fixUnknownStates(BF);
 
-  bool FirstIter = true;
-  MCInst PrevInst;
-  BinaryBasicBlock *PrevBB = nullptr;
-  // We need to iterate on BBs in the Layout order
-  // not in the order they are stored in the BF class.
-  auto *Begin = BF.getLayout().block_begin();
-  auto *End = BF.getLayout().block_end();
-  for (auto *BB = Begin; BB != End; BB++) {
-
-    // Support for function splitting:
-    // if two consecutive BBs with Signed state are going to end up in different
-    // functions, we have to add a OpNegateRAState to the beginning of the newly
-    // split function, so it starts with a Signed state.
-    if (PrevBB != nullptr &&
-        PrevBB->getFragmentNum() != (*BB)->getFragmentNum() &&
-        BC.MIB->isRASigned(*((*BB)->begin()))) {
-      BF.addCFIInstruction(*BB, (*BB)->begin(),
+  // Support for function splitting:
+  // if two consecutive BBs with Signed state are going to end up in different
+  // functions (so are held by different FunctionFragments), we have to add a
+  // OpNegateRAState to the beginning of the newly split function, so it starts
+  // with a Signed state.
+  for (FunctionFragment &FF : BF.getLayout().fragments()) {
+    // Find the first BB in the FF which has Instructions.
+    // BOLT can generate empty BBs at function splitting which are only used as
+    // target labels. We should add the negate-ra-state CFI to the first
+    // non-empty BB.
+    auto FirstNonEmpty =
+        std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) {
+          // getFirstNonPseudo returns BB.end() if it does not find any
+          // Instructions.
+          return BB->getFirstNonPseudo() != BB->end();
+        });
+    if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) {
+      BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(),
                            MCCFIInstruction::createNegateRAState(nullptr));
     }
+  }
 
-    for (auto It = (*BB)->begin(); It != (*BB)->end(); ++It) {
+  bool FirstIter = true;
+  MCInst PrevInst;
+  for (BinaryBasicBlock &BB : BF) {
+    for (auto It = BB.begin(); It != BB.end(); ++It) {
 
       MCInst &Inst = *It;
       if (BC.MIB->isCFI(Inst))
@@ -76,7 +81,7 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
             (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) {
 
           It = BF.addCFIInstruction(
-              *BB, It, MCCFIInstruction::createNegateRAState(nullptr));
+              &BB, It, MCCFIInstruction::createNegateRAState(nullptr));
         }
 
       } else {
@@ -84,7 +89,6 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
       }
       PrevInst = *It;
     }
-    PrevBB = *BB;
   }
 }
 

>From 12a04c734479b9eefa427ead5e6ec8ff2c892578 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Thu, 31 Jul 2025 08:42:06 +0000
Subject: [PATCH 08/20] [BOLT] Bugfix: CFIs can be placed before the first
 Instruction

- This caused a crash when trying to Annotate RAState-changing CFIs
    (RememberState, RestoreState, NegateRAState).
- The fix introduces an InitialRAState for each BinaryFunction.
- If we have a NegateRAState before the first Instr, we set that to
  True.
- In MarkRAStates, we push the InitialRAState to the RAStateStack: as we
  may have omitted the RememberState at the function start, its RestoreState
  pair would try to pop an empty stack otherwise.
---
 bolt/include/bolt/Core/BinaryFunction.h |  5 +++++
 bolt/lib/Core/Exceptions.cpp            | 21 ++++++++++++++++-----
 bolt/lib/Passes/MarkRAStates.cpp        |  3 ++-
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index ebe12929c0973..c797f9e2ac71a 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -147,6 +147,9 @@ class BinaryFunction {
     PF_MEMEVENT = 4, /// Profile has mem events.
   };
 
+  void setInitialRAState(bool State) { InitialRAState = State; }
+  bool getInitialRAState() { return InitialRAState; }
+
   /// Struct for tracking exception handling ranges.
   struct CallSite {
     const MCSymbol *Start;
@@ -220,6 +223,8 @@ class BinaryFunction {
   /// Current state of the function.
   State CurrentState{State::Empty};
 
+  bool InitialRAState{false};
+
   /// A list of symbols associated with the function entry point.
   ///
   /// Multiple symbols would typically result from identical code-folding
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index 63b7ad43b1dec..954c21e14ba17 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -569,20 +569,24 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
       Function.addCFIInstruction(
           Offset, MCCFIInstruction::createRememberState(nullptr));
 
-      if (Function.getBinaryContext().isAArch64())
+      if (Function.getBinaryContext().isAArch64()) {
         // Support for pointer authentication:
         // We need to annotate instructions that modify the RA State, to work
         // out the state of each instruction in MarkRAStates Pass.
-        Function.setInstModifiesRAState(DW_CFA_remember_state, Offset);
+        if (Offset != 0)
+          Function.setInstModifiesRAState(DW_CFA_remember_state, Offset);
+      }
       break;
     case DW_CFA_restore_state:
       Function.addCFIInstruction(Offset,
                                  MCCFIInstruction::createRestoreState(nullptr));
-      if (Function.getBinaryContext().isAArch64())
+      if (Function.getBinaryContext().isAArch64()) {
         // Support for pointer authentication:
         // We need to annotate instructions that modify the RA State, to work
         // out the state of each instruction in MarkRAStates Pass.
-        Function.setInstModifiesRAState(DW_CFA_restore_state, Offset);
+        if (Offset != 0)
+          Function.setInstModifiesRAState(DW_CFA_restore_state, Offset);
+      }
       break;
     case DW_CFA_def_cfa:
       Function.addCFIInstruction(
@@ -649,7 +653,14 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
         // is added to the instruction, to mark that the instruction modifies
         // the RA State. The actual state for instructions are worked out in
         // MarkRAStates based on these annotations.
-        Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state, Offset);
+        if (Offset != 0)
+          Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state,
+                                          Offset);
+        else
+          // We cannot Annotate an instruction at Offset == 0.
+          // Instead, we save the initial (Signed) state, and push it to
+          // MarkRAStates' RAStateStack.
+          Function.setInitialRAState(true);
         break;
       }
       if (opts::Verbosity >= 1)
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index 8121fffb93c9f..d7db5532002bf 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -62,8 +62,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
     }
   }
 
-  bool RAState = false;
+  bool RAState = BF.getInitialRAState();
   std::stack<bool> RAStateStack;
+  RAStateStack.push(RAState);
 
   for (BinaryBasicBlock &BB : BF) {
     for (auto It = BB.begin(); It != BB.end(); ++It) {

>From 8663454568b1475fe3d1ebbc221d03ace461de67 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Fri, 1 Aug 2025 08:21:17 +0000
Subject: [PATCH 09/20] [BOLT] Add function-splitting test

---
 .../runtime/AArch64/pacret-function-split.cpp | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 bolt/test/runtime/AArch64/pacret-function-split.cpp

diff --git a/bolt/test/runtime/AArch64/pacret-function-split.cpp b/bolt/test/runtime/AArch64/pacret-function-split.cpp
new file mode 100644
index 0000000000000..208fc5c115571
--- /dev/null
+++ b/bolt/test/runtime/AArch64/pacret-function-split.cpp
@@ -0,0 +1,42 @@
+/* This test check that the negate-ra-state CFIs are properly emitted in case of
+   function splitting. The test checks two things:
+    - we split at the correct location: to test the feature,
+        we need to split *before* the bl __cxa_throw at PLT call is made,
+        so the unwinder has to unwind from the split (cold) part.
+
+    - the BOLTed binary runs, and returns the string from foo.
+
+# REQUIRES: system-linux,bolt-runtime
+
+# FDATA: 1 main #split# 1 _Z3foov 0 0 1
+
+# RUN: %clangxx --target=aarch64-unknown-linux-gnu \
+# RUN: -mbranch-protection=pac-ret %s -o %t.exe -Wl,-q
+# RUN: link_fdata %s %t.exe %t.fdata
+# RUN: llvm-bolt %t.exe -o %t.bolt --split-functions --split-eh \
+# RUN: --split-strategy=profile2 --split-all-cold --print-split \
+# RUN: --print-only=_Z3foov --data=%t.fdata 2>&1 | FileCheck \
+# RUN: --check-prefix=BOLT-CHECK %s
+# RUN: %t.bolt | FileCheck %s  --check-prefix=RUN-CHECK
+
+# BOLT-CHECK-NOT: bl      __cxa_throw at PLT
+# BOLT-CHECK: -------   HOT-COLD SPLIT POINT   -------
+# BOLT-CHECK: bl      __cxa_throw at PLT
+
+# RUN-CHECK: Exception caught: Exception from foo().
+*/
+
+#include <cstdio>
+#include <stdexcept>
+
+void foo() { throw std::runtime_error("Exception from foo()."); }
+
+int main() {
+  try {
+    __asm__ __volatile__("split:");
+    foo();
+  } catch (const std::exception &e) {
+    printf("Exception caught: %s\n", e.what());
+  }
+  return 0;
+}

>From 3c05227fc6c68962e39269a2a5a0364c4e23c40f Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 4 Aug 2025 13:48:49 +0000
Subject: [PATCH 10/20] [BOLT] Improve warnings in MarkRAStates

- Previously, the same warning was printed in
  3 different cases. Changed warnings to be more specific.
---
 bolt/lib/Passes/MarkRAStates.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index d7db5532002bf..1b56cd83a2314 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -57,6 +57,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
         BF.setIgnored();
         BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
                   << BF.getPrintName() << "\n";
+        BC.outs()
+            << "BOLT-INFO: ptr sign/auth inst without .cfi_negate_ra_state\n";
         return;
       }
     }
@@ -78,6 +80,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
           // RA signing instructions should only follow unsigned RA state.
           BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
                     << BF.getPrintName() << "\n";
+          BC.outs() << "BOLT-INFO: ptr signing inst encountered in Signed RA "
+                       "state.\n";
           BF.setIgnored();
           return;
         }
@@ -87,6 +91,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
           // RA authenticating instructions should only follow signed RA state.
           BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
                     << BF.getPrintName() << "\n";
+          BC.outs() << "BOLT-INFO: ptr authenticating inst encountered in "
+                       "Unsigned RA state.\n";
           BF.setIgnored();
           return;
         }

>From 5625e809a3b1ac896cf3fd0b2de3ad98b9b6cc33 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Tue, 5 Aug 2025 13:33:44 +0000
Subject: [PATCH 11/20] [BOLT] Improve negate-ra-state-incorrect test

- check all 3 possibilities that MarkRAStates warns about,
- simplify the asm functions in the test,
- use FileCheck instead of grep on the disassembly.
---
 bolt/test/AArch64/negate-ra-state-incorrect.s | 79 ++++++++++++++-----
 1 file changed, 59 insertions(+), 20 deletions(-)

diff --git a/bolt/test/AArch64/negate-ra-state-incorrect.s b/bolt/test/AArch64/negate-ra-state-incorrect.s
index c6b8b36939f4d..ad922d83c6d61 100644
--- a/bolt/test/AArch64/negate-ra-state-incorrect.s
+++ b/bolt/test/AArch64/negate-ra-state-incorrect.s
@@ -1,19 +1,31 @@
+# This test checks that MarkRAStates pass ignores functions with
+# malformed .cfi_negate_ra_state sequences in the input binary.
+
+# The cases checked are:
+#   - extra .cfi_negate_ra_state in Signed state: checked in foo,
+#   - extra .cfi_negate_ra_state in Unsigned state: checked in bar,
+#   - missing .cfi_negate_ra_state from PSign or PAuth instructions: checked in baz.
+
 # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
 # RUN: %clang %cflags  %t.o -o %t.exe -Wl,-q
-# RUN: llvm-bolt %t.exe -o %t.exe.bolt | FileCheck %s
+# RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads | FileCheck %s --check-prefix=CHECK-BOLT
+
+# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function foo
+# CHECK-BOLT-NEXT: BOLT-INFO: ptr authenticating inst encountered in Unsigned RA state.
 
-# check that the output is listing foo as incorrect
-# CHECK: BOLT-INFO: inconsistent RAStates in function foo
+# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function bar
+# CHECK-BOLT-NEXT: BOLT-INFO: ptr signing inst encountered in Signed RA state
 
-# check that foo got Ignored, so it's not in the new .text section
-# RUN: llvm-objdump %t.exe.bolt -d -j .text > %t.exe.dump
-# RUN: not grep "<foo>:" %t.exe.dump
+# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function baz
+# CHECK-BOLT-NEXT: BOLT-INFO: ptr sign/auth inst without .cfi_negate_ra_state
+
+# Check that the incorrect functions got ignored, so they are not in the new .text section
+# RUN: llvm-objdump %t.exe.bolt -d -j .text | FileCheck %s --check-prefix=CHECK-OBJDUMP
+# CHECK-OBJDUMP-NOT: <foo>:
+# CHECK-OBJDUMP-NOT: <bar>:
+# CHECK-OBJDUMP-NOT: <baz>:
 
 
-# How is this test incorrect?
-# There is an extra .cfi_negate_ra_state in foo.
-# Because of this, we will get to the autiasp (hint #29)
-# in a (seemingly) unsigned state. That is incorrect.
   .text
   .globl  foo
   .p2align        2
@@ -22,23 +34,50 @@ foo:
   .cfi_startproc
   hint    #25
   .cfi_negate_ra_state
-  sub     sp, sp, #16
-  stp     x29, x30, [sp, #16]             // 16-byte Folded Spill
-  .cfi_def_cfa_offset 16
-  str     w0, [sp, #12]
-  ldr     w8, [sp, #12]
+  mov x1, #0
+  .cfi_negate_ra_state        // Incorrect CFI in signed state
+  hint    #29
+  .cfi_negate_ra_state
+  ret
+  .cfi_endproc
+  .size   foo, .-foo
+
+  .text
+  .globl  bar
+  .p2align        2
+  .type   bar, at function
+bar:
+  .cfi_startproc
+  mov x1, #0
+  .cfi_negate_ra_state      // Incorrect CFI in unsigned state
+  hint    #25
   .cfi_negate_ra_state
-  add     w0, w8, #1
-  ldp     x29, x30, [sp, #16]             // 16-byte Folded Reload
-  add     sp, sp, #16
+  mov x1, #0
   hint    #29
   .cfi_negate_ra_state
   ret
-.Lfunc_end1:
-  .size   foo, .Lfunc_end1-foo
   .cfi_endproc
+  .size   bar, .-bar
+
+  .text
+  .globl  baz
+  .p2align        2
+  .type   baz, at function
+baz:
+  .cfi_startproc
+  mov x1, #0
+  hint    #25
+  .cfi_negate_ra_state
+  mov x1, #0
+  hint    #29
+                            // Missing .cfi_negate_ra_state
+  ret
+  .cfi_endproc
+  .size   baz, .-baz
 
   .global _start
   .type _start, %function
 _start:
   b foo
+  b bar
+  b baz

>From dd2395ca70dda9879b30fe99636f77079377c17f Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 6 Aug 2025 13:57:29 +0000
Subject: [PATCH 12/20] [BOLT] Do run negate-ra-state rewriting passes on all
 functions

- if a BinaryFunction did not contain any .cfi_negate_ra_state,
  we can skip MarkRAStates and InsertNegateRAStatePass. This can happen
  with handwritten assembly, or if the binary was compiled in a way that
  drops DWARF tables.
---
 bolt/include/bolt/Core/BinaryFunction.h     | 6 ++++++
 bolt/lib/Core/Exceptions.cpp                | 1 +
 bolt/lib/Passes/InsertNegateRAStatePass.cpp | 8 +++++++-
 bolt/lib/Passes/MarkRAStates.cpp            | 8 +++++++-
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index c797f9e2ac71a..5335ec5a9a7bd 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -147,6 +147,8 @@ class BinaryFunction {
     PF_MEMEVENT = 4, /// Profile has mem events.
   };
 
+  void setContainedNegateRAState() { HadNegateRAState = true; }
+  bool containedNegateRAState() { return HadNegateRAState; }
   void setInitialRAState(bool State) { InitialRAState = State; }
   bool getInitialRAState() { return InitialRAState; }
 
@@ -223,6 +225,10 @@ class BinaryFunction {
   /// Current state of the function.
   State CurrentState{State::Empty};
 
+  /// Indicates if the Function contained .cfi-negate-ra-state. These are not
+  /// read from the binary. This boolean is used when deciding to run the
+  /// .cfi-negate-ra-state rewriting passes on a function or not.
+  bool HadNegateRAState{false};
   bool InitialRAState{false};
 
   /// A list of symbols associated with the function entry point.
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index 954c21e14ba17..08bd1e694d901 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -647,6 +647,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
       // DW_CFA_GNU_window_save and DW_CFA_AARCH64_negate_ra_state just use the
       // same id but mean different things. The latter is used in AArch64.
       if (Function.getBinaryContext().isAArch64()) {
+        Function.setContainedNegateRAState();
         // The location OpNegateRAState CFIs are needed
         // depends on the order of BasicBlocks, which changes during
         // optimizations. Instead of adding OpNegateRAState CFIs, an annotation
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index 0eb0e3aef00d4..80857204ff53c 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -137,7 +137,13 @@ void InsertNegateRAState::fixUnknownStates(BinaryFunction &BF) {
 
 Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) {
   ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
-    runOnFunction(BF);
+    if (BF.containedNegateRAState()) {
+      // We can skip functions which did not include negate-ra-state CFIs. This
+      // includes code using pac-ret hardening as well, if the binary is
+      // compiled with `-fno-exceptions -fno-unwind-tables
+      // -fno-asynchronous-unwind-tables`
+      runOnFunction(BF);
+    }
   };
 
   ParallelUtilities::runOnEachFunction(
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index 1b56cd83a2314..9e2ab049a67f5 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -127,7 +127,13 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
 
 Error MarkRAStates::runOnFunctions(BinaryContext &BC) {
   ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
-    runOnFunction(BF);
+    if (BF.containedNegateRAState()) {
+      // We can skip functions which did not include negate-ra-state CFIs. This
+      // includes code using pac-ret hardening as well, if the binary is
+      // compiled with `-fno-exceptions -fno-unwind-tables
+      // -fno-asynchronous-unwind-tables`
+      runOnFunction(BF);
+    }
   };
 
   ParallelUtilities::runOnEachFunction(

>From 871de4c9ff53b2c5ea3b19db30b4dc9aa9fcd2de Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 6 Aug 2025 13:58:37 +0000
Subject: [PATCH 13/20] [BOLT] Introduce --disallow-pacret flag

---
 bolt/include/bolt/Utils/CommandLineOpts.h    |  1 +
 bolt/lib/Rewrite/BinaryPassManager.cpp       |  9 +++++++
 bolt/lib/Rewrite/RewriteInstance.cpp         | 11 +++++++++
 bolt/test/AArch64/negate-ra-state-disallow.s | 25 ++++++++++++++++++++
 4 files changed, 46 insertions(+)
 create mode 100644 bolt/test/AArch64/negate-ra-state-disallow.s

diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index a75b6bf720ec4..cad49adf38573 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -72,6 +72,7 @@ extern llvm::cl::opt<std::string> OutputFilename;
 extern llvm::cl::opt<std::string> PerfData;
 extern llvm::cl::opt<bool> PrintCacheMetrics;
 extern llvm::cl::opt<bool> PrintSections;
+extern llvm::cl::opt<bool> DisallowPacret;
 
 // The format to use with -o in aggregation mode (perf2bolt)
 enum ProfileFormatKind { PF_Fdata, PF_YAML };
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 8b412854dc701..4f104ea3ee7c3 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -275,6 +275,15 @@ static cl::opt<bool> ShortenInstructions("shorten-instructions",
                                          cl::desc("shorten instructions"),
                                          cl::init(true),
                                          cl::cat(BoltOptCategory));
+
+// This flag is used to "gate" the negate-ra-state CFI handling.
+// Sometimes, binaries use pac-ret but not contain negate-ra-state CFIs. That
+// should cause no issues for BOLT.
+cl::opt<bool> DisallowPacret(
+    "disallow-pacret",
+    cl::desc("Disable processing binaries containing negate-ra-state DWARF "
+             "CFIs (e.g. binaries using pac-ret hardening)"),
+    cl::cat(BoltOptCategory));
 } // namespace opts
 
 namespace llvm {
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index fe4a23cc01382..97cc87ee884c7 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3491,6 +3491,17 @@ void RewriteInstance::disassembleFunctions() {
       }
     }
 
+    // Check if fillCFIInfoFor removed any OpNegateRAState CFIs from the
+    // function.
+    if (Function.containedNegateRAState()) {
+      if (opts::DisallowPacret) {
+        BC->errs() << "BOLT-ERROR: --disallow-pacret flag was used, but "
+                   << Function.getPrintName()
+                   << " contains .cfi-negate-ra-state.\n";
+        exit(1);
+      }
+    }
+
     // Parse LSDA.
     if (Function.getLSDAAddress() != 0 &&
         !BC->getFragmentsToSkip().count(&Function)) {
diff --git a/bolt/test/AArch64/negate-ra-state-disallow.s b/bolt/test/AArch64/negate-ra-state-disallow.s
new file mode 100644
index 0000000000000..97488be6eb940
--- /dev/null
+++ b/bolt/test/AArch64/negate-ra-state-disallow.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags  %t.o -o %t.exe -Wl,-q
+# RUN: not llvm-bolt %t.exe -o %t.exe.bolt --disallow-pacret 2>&1 | FileCheck %s
+
+# CHECK: BOLT-ERROR: --disallow-pacret flag was used, but foo contains .cfi-negate-ra-state.
+
+  .text
+  .globl  foo
+  .p2align        2
+  .type   foo, at function
+foo:
+  .cfi_startproc
+  hint    #25
+  .cfi_negate_ra_state
+  mov x1, #0
+  hint    #29
+  .cfi_negate_ra_state
+  ret
+  .cfi_endproc
+  .size   foo, .-foo
+
+  .global _start
+  .type _start, %function
+_start:
+  b foo

>From 8c05e2eb277be8f604da07c9eca092d0eab32a92 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Fri, 8 Aug 2025 12:32:56 +0000
Subject: [PATCH 14/20] [BOLT][NFC] Review nits

- remove unused imports
- fix clangd warnings
- rename functions
- improve comments
- reformat lib/Core/Exceptions.cpp
---
 bolt/include/bolt/Core/MCPlusBuilder.h             |  2 +-
 bolt/include/bolt/Passes/InsertNegateRAStatePass.h | 12 +++++++-----
 bolt/lib/Core/Exceptions.cpp                       | 12 ++++++------
 bolt/lib/Passes/InsertNegateRAStatePass.cpp        | 14 ++++++--------
 bolt/lib/Passes/MarkRAStates.cpp                   | 13 +++----------
 5 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 6d41eaf45b03e..efea2f4590ee7 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -77,7 +77,7 @@ class MCPlusBuilder {
     if (*FirstAnnotationOp > OpIndex || Inst.getNumOperands() < OpIndex)
       return std::nullopt;
 
-    auto Op = Inst.begin() + OpIndex;
+    const auto *Op = Inst.begin() + OpIndex;
     const int64_t ImmValue = Op->getImm();
     return extractAnnotationIndex(ImmValue);
   }
diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
index ce73b5a152d12..2394356401564 100644
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -13,7 +13,6 @@
 #define BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS
 
 #include "bolt/Passes/BinaryPasses.h"
-#include <stack>
 
 namespace llvm {
 namespace bolt {
@@ -31,14 +30,17 @@ class InsertNegateRAState : public BinaryFunctionPass {
 private:
   /// Loops over all instructions and adds OpNegateRAState CFI
   /// after any pointer signing or authenticating instructions,
-  /// which operate on the LR, except fused ptrauth + ret instructions
-  /// (such as RETAA).
+  /// which operate on the LR, except fused pauth + ret instructions
+  /// (such as RETAA). Normal pauth and psign instructions are "special cases",
+  /// meaning they always need an OpNegateRAState CFI after them.
+  /// Fused pauth + ret instructions are not, they work as any other
+  /// instruction.
   /// Returns true, if any OpNegateRAState CFIs were added.
-  bool addNegateRAStateAfterPacOrAuth(BinaryFunction &BF);
+  bool addNegateRAStateAfterPSignOrPAuth(BinaryFunction &BF);
   /// Because states are tracked as MCAnnotations on individual instructions,
   /// newly inserted instructions do not have a state associated with them.
   /// New states are "inherited" from the last known state.
-  void fixUnknownStates(BinaryFunction &BF);
+  void inferUnknownStates(BinaryFunction &BF);
 };
 
 } // namespace bolt
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index 08bd1e694d901..eedca368aaaa0 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -648,12 +648,12 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
       // same id but mean different things. The latter is used in AArch64.
       if (Function.getBinaryContext().isAArch64()) {
         Function.setContainedNegateRAState();
-        // The location OpNegateRAState CFIs are needed
-        // depends on the order of BasicBlocks, which changes during
-        // optimizations. Instead of adding OpNegateRAState CFIs, an annotation
-        // is added to the instruction, to mark that the instruction modifies
-        // the RA State. The actual state for instructions are worked out in
-        // MarkRAStates based on these annotations.
+        // The location OpNegateRAState CFIs are needed depends on the order of
+        // BasicBlocks, which changes during optimizations. Instead of adding
+        // OpNegateRAState CFIs, an annotation is added to the instruction, to
+        // mark that the instruction modifies the RA State. The actual state for
+        // instructions are worked out in MarkRAStates based on these
+        // annotations.
         if (Offset != 0)
           Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state,
                                           Offset);
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index 80857204ff53c..8393f2b05a16f 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -14,10 +14,7 @@
 #include "bolt/Passes/InsertNegateRAStatePass.h"
 #include "bolt/Core/BinaryFunction.h"
 #include "bolt/Core/ParallelUtilities.h"
-#include "bolt/Utils/CommandLineOpts.h"
 #include <cstdlib>
-#include <fstream>
-#include <iterator>
 
 using namespace llvm;
 
@@ -38,10 +35,10 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
   }
 
   // If none is inserted, the function doesn't need more work.
-  if (!addNegateRAStateAfterPacOrAuth(BF))
+  if (!addNegateRAStateAfterPSignOrPAuth(BF))
     return;
 
-  fixUnknownStates(BF);
+  inferUnknownStates(BF);
 
   // Support for function splitting:
   // if two consecutive BBs with Signed state are going to end up in different
@@ -53,7 +50,7 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
     // BOLT can generate empty BBs at function splitting which are only used as
     // target labels. We should add the negate-ra-state CFI to the first
     // non-empty BB.
-    auto FirstNonEmpty =
+    auto *FirstNonEmpty =
         std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) {
           // getFirstNonPseudo returns BB.end() if it does not find any
           // Instructions.
@@ -92,7 +89,8 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
   }
 }
 
-bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) {
+bool InsertNegateRAState::addNegateRAStateAfterPSignOrPAuth(
+    BinaryFunction &BF) {
   BinaryContext &BC = BF.getBinaryContext();
   bool FoundAny = false;
   for (BinaryBasicBlock &BB : BF) {
@@ -109,7 +107,7 @@ bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) {
   return FoundAny;
 }
 
-void InsertNegateRAState::fixUnknownStates(BinaryFunction &BF) {
+void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) {
   BinaryContext &BC = BF.getBinaryContext();
   bool FirstIter = true;
   MCInst PrevInst;
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index 9e2ab049a67f5..aacb14fb7dcc9 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -22,12 +22,7 @@
 #include "bolt/Passes/MarkRAStates.h"
 #include "bolt/Core/BinaryFunction.h"
 #include "bolt/Core/ParallelUtilities.h"
-#include "bolt/Utils/CommandLineOpts.h"
 #include <cstdlib>
-#include <fstream>
-#include <iterator>
-
-#include <iostream>
 #include <optional>
 #include <stack>
 
@@ -49,11 +44,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
       if ((BC.MIB->isPSignOnLR(Inst) ||
            (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) &&
           !BC.MIB->hasNegateRAState(Inst)) {
-        // no .cfi_negate_ra_state attached to signing or authenticating instr
-        // means, that this is a function with handwritten assembly, which might
-        // not respect Clang's conventions (e.g. tailcalls are always
-        // authenticated, so functions always start with unsigned RAState when
-        // working with compiler-generated code)
+        // Not all functions have .cfi_negate_ra_state in them. But if one does,
+        // we expect psign/pauth instructions to have the hasNegateRAState
+        // annotation.
         BF.setIgnored();
         BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
                   << BF.getPrintName() << "\n";

>From ac7e16cc355d5e029687cea3a8644af1398506e8 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Fri, 8 Aug 2025 12:39:02 +0000
Subject: [PATCH 15/20] [BOLT] Refactor and improve InsertNegateRAStatePass

- separate function splitting code into its own function
- change the iteration looking for state-switches between consecutive
  instructions to only look for changes *inside* FunctionFragments,
  and skip borders between them.
---
 .../bolt/Passes/InsertNegateRAStatePass.h     |  7 ++
 bolt/lib/Passes/InsertNegateRAStatePass.cpp   | 91 ++++++++++---------
 2 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
index 2394356401564..0b1654c1380b9 100644
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -41,6 +41,13 @@ class InsertNegateRAState : public BinaryFunctionPass {
   /// newly inserted instructions do not have a state associated with them.
   /// New states are "inherited" from the last known state.
   void inferUnknownStates(BinaryFunction &BF);
+
+  /// Support for function splitting:
+  /// if two consecutive BBs with Signed state are going to end up in different
+  /// functions (so are held by different FunctionFragments), we have to add a
+  /// OpNegateRAState to the beginning of the newly split function, so it starts
+  /// with a Signed state.
+  void coverFunctionFragmentStart(BinaryFunction &BF, FunctionFragment &FF);
 };
 
 } // namespace bolt
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index 8393f2b05a16f..8ee2d28d0e379 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -34,57 +34,35 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
     return;
   }
 
-  // If none is inserted, the function doesn't need more work.
-  if (!addNegateRAStateAfterPSignOrPAuth(BF))
-    return;
+  // Attach .cfi_negate_ra_state to the "trivial" cases first.
+  addNegateRAStateAfterPSignOrPAuth(BF);
 
   inferUnknownStates(BF);
 
-  // Support for function splitting:
-  // if two consecutive BBs with Signed state are going to end up in different
-  // functions (so are held by different FunctionFragments), we have to add a
-  // OpNegateRAState to the beginning of the newly split function, so it starts
-  // with a Signed state.
   for (FunctionFragment &FF : BF.getLayout().fragments()) {
-    // Find the first BB in the FF which has Instructions.
-    // BOLT can generate empty BBs at function splitting which are only used as
-    // target labels. We should add the negate-ra-state CFI to the first
-    // non-empty BB.
-    auto *FirstNonEmpty =
-        std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) {
-          // getFirstNonPseudo returns BB.end() if it does not find any
-          // Instructions.
-          return BB->getFirstNonPseudo() != BB->end();
-        });
-    if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) {
-      BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(),
-                           MCCFIInstruction::createNegateRAState(nullptr));
-    }
-  }
-
-  bool FirstIter = true;
-  MCInst PrevInst;
-  for (BinaryBasicBlock &BB : BF) {
-    for (auto It = BB.begin(); It != BB.end(); ++It) {
-
-      MCInst &Inst = *It;
-      if (BC.MIB->isCFI(Inst))
-        continue;
-
-      if (!FirstIter) {
-        // Consecutive instructions with different RAState means we need to add
-        // a OpNegateRAState.
-        if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) ||
-            (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) {
-
-          It = BF.addCFIInstruction(
-              &BB, It, MCCFIInstruction::createNegateRAState(nullptr));
+    coverFunctionFragmentStart(BF, FF);
+    bool FirstIter = true;
+    MCInst PrevInst;
+    // As this pass runs after function splitting, we should only check
+    // consecutive instructions inside FunctionFragments.
+    for (BinaryBasicBlock *BB : FF) {
+      for (auto It = BB->begin(); It != BB->end(); ++It) {
+        MCInst &Inst = *It;
+        if (BC.MIB->isCFI(Inst))
+          continue;
+        if (!FirstIter) {
+          // Consecutive instructions with different RAState means we need to
+          // add a OpNegateRAState.
+          if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) ||
+              (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) {
+            It = BF.addCFIInstruction(
+                BB, It, MCCFIInstruction::createNegateRAState(nullptr));
+          }
+        } else {
+          FirstIter = false;
         }
-
-      } else {
-        FirstIter = false;
+        PrevInst = *It;
       }
-      PrevInst = *It;
     }
   }
 }
@@ -107,6 +85,29 @@ bool InsertNegateRAState::addNegateRAStateAfterPSignOrPAuth(
   return FoundAny;
 }
 
+void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF,
+                                                     FunctionFragment &FF) {
+  BinaryContext &BC = BF.getBinaryContext();
+  if (FF.empty())
+    return;
+  // Find the first BB in the FF which has Instructions.
+  // BOLT can generate empty BBs at function splitting which are only used as
+  // target labels. We should add the negate-ra-state CFI to the first
+  // non-empty BB.
+  auto *FirstNonEmpty =
+      std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) {
+        // getFirstNonPseudo returns BB.end() if it does not find any
+        // Instructions.
+        return BB->getFirstNonPseudo() != BB->end();
+      });
+  // If a function is already split in the input, the first FF can also start
+  // with Signed state. This covers that scenario as well.
+  if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) {
+    BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(),
+                         MCCFIInstruction::createNegateRAState(nullptr));
+  }
+}
+
 void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) {
   BinaryContext &BC = BF.getBinaryContext();
   bool FirstIter = true;

>From e60be8ee64e8d906d49193a9a024aba429f722b6 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 11 Aug 2025 11:20:45 +0000
Subject: [PATCH 16/20] [BOLT] Add function splitting lit test without runtime
 dependency

- Test uses lit and FileCheck to check if OpNegateRAState CFIs are
  generated in the correct location for function-splitting.
- Also fix a bug related to function-splitting.
---
 bolt/lib/Passes/InsertNegateRAStatePass.cpp |  3 +-
 bolt/test/AArch64/pacret-split-funcs.s      | 56 +++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/AArch64/pacret-split-funcs.s

diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index 8ee2d28d0e379..60af3485ea4d1 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -102,7 +102,8 @@ void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF,
       });
   // If a function is already split in the input, the first FF can also start
   // with Signed state. This covers that scenario as well.
-  if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) {
+  if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin())) ||
+      BC.MIB->isAuthenticating(*((*FirstNonEmpty)->begin()))) {
     BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(),
                          MCCFIInstruction::createNegateRAState(nullptr));
   }
diff --git a/bolt/test/AArch64/pacret-split-funcs.s b/bolt/test/AArch64/pacret-split-funcs.s
new file mode 100644
index 0000000000000..6582257948c62
--- /dev/null
+++ b/bolt/test/AArch64/pacret-split-funcs.s
@@ -0,0 +1,56 @@
+# Checking that we generate an OpNegateRAState CFI after the split point,
+# when splitting a region with signed RA state.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -o %t %s
+# RUN: %clang %s %cflags -Wl,-q -o %t
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions \
+# RUN: --print-only foo --print-split --print-all 2>&1 | FileCheck %s
+
+# Checking that we don't see any OpNegateRAState CFIs before the insertion pass.
+# CHECK-NOT: OpNegateRAState
+# CHECK: Binary Function "foo" after insert-negate-ra-state-pass
+
+# CHECK:       paciasp
+# CHECK-NEXT:  OpNegateRAState
+
+# CHECK: -------   HOT-COLD SPLIT POINT   -------
+
+# CHECK:         OpNegateRAState
+# CHECK-NEXT:    autiasp
+# CHECK-NEXT:    OpNegateRAState
+# CHECK-NEXT:    ret
+
+# CHECK:         autiasp
+# CHECK-NEXT:    OpNegateRAState
+# CHECK-NEXT:    ret
+
+# End of the insert-negate-ra-state-pass logs
+# CHECK: Binary Function "foo" after finalize-functions
+
+  .text
+  .globl  foo
+  .type foo, %function
+foo:
+.cfi_startproc
+.entry_bb:
+     paciasp
+    .cfi_negate_ra_state     // indicating that paciasp changed the RA state to signed
+# FDATA: 1 foo #.entry_bb# 10
+    cmp x0, #0
+    b.eq .Lcold_bb1
+    autiasp
+    .cfi_negate_ra_state     // indicating that autiasp changed the RA state to unsigned
+    ret
+    .cfi_negate_ra_state     // ret has unsigned RA state, but the next inst (autiasp) has signed RA state
+.Lcold_bb1:
+    autiasp
+    .cfi_negate_ra_state     // indicating that autiasp changed the RA state to unsigned
+    ret
+.cfi_endproc
+  .size foo, .-foo
+
+## Force relocation mode.
+.reloc 0, R_AARCH64_NONE

>From 6e13a3549b7981e8382235829c5d715714ec3ec8 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 11 Aug 2025 13:04:15 +0000
Subject: [PATCH 17/20] [BOLT] Remove unnecessary script, and rewrite unit test
 using it

- the checks that match_dwarf.py helped with can also be done using BOLT
  logs.
- rewrote the negate-ra-state.s test to only rely on logs to verify
  correct OpNegateRAState CFI generation.
---
 bolt/test/AArch64/negate-ra-state.s | 100 +++++++++++++-------
 bolt/test/lit.cfg.py                |   7 --
 bolt/test/match_dwarf.py            | 137 ----------------------------
 3 files changed, 67 insertions(+), 177 deletions(-)
 delete mode 100755 bolt/test/match_dwarf.py

diff --git a/bolt/test/AArch64/negate-ra-state.s b/bolt/test/AArch64/negate-ra-state.s
index 11c511a254c71..65483dfaf28e5 100644
--- a/bolt/test/AArch64/negate-ra-state.s
+++ b/bolt/test/AArch64/negate-ra-state.s
@@ -1,42 +1,76 @@
+# Checking that .cfi-negate_ra_state directives are emitted in the same location as in the input in the case of no optimizations.
+
 # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
 # RUN: %clang %cflags  %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads --print-all | FileCheck %s --check-prefix=CHECK-BOLT
+
+# Check that the negate-ra-state at the start of bar is not discarded.
+# If it was discarded, MarkRAState would report bar as having inconsistent RAStates.
+# CHECK-BOLT-NOT: BOLT-INFO: inconsistent RAStates in function foo
+# CHECK-BOLT-NOT: BOLT-INFO: inconsistent RAStates in function bar
+
+# Check that OpNegateRAState CFIs are generated correctly.
+# CHECK-BOLT: Binary Function "foo" after insert-negate-ra-state-pass {
+# CHECK-BOLT:         paciasp
+# CHECK-BOLT-NEXT:    OpNegateRAState
+
+# CHECK-BOLT:      DWARF CFI Instructions:
+# CHECK-BOLT-NEXT:     0:  OpNegateRAState
+# CHECK-BOLT-NEXT: End of Function "foo"
+
+# CHECK-BOLT: Binary Function "bar" after insert-negate-ra-state-pass {
+# CHECK-BOLT:         OpNegateRAState
+# CHECK-BOLT-NEXT:    mov     x1, #0x0
+# CHECK-BOLT-NEXT:    mov     x1, #0x1
+# CHECK-BOLT-NEXT:    autiasp
+# CHECK-BOLT-NEXT:    OpNegateRAState
+# CHECK-BOLT-NEXT:    ret
 
-# RUN: llvm-objdump %t.exe -d > %t.exe.dump
-# RUN: llvm-objdump --dwarf=frames %t.exe > %t.exe.dump-dwarf
-# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > %t.match-dwarf.txt
+# CHECK-BOLT:     DWARF CFI Instructions:
+# CHECK-BOLT-NEXT:     0:  OpNegateRAState
+# CHECK-BOLT-NEXT:     1:  OpNegateRAState
+# CHECK-BOLT-NEXT: End of Function "bar"
 
-# RUN: llvm-bolt %t.exe -o %t.exe.bolt
+# End of negate-ra-state insertion logs for foo and bar.
+# CHECK: Binary Function "_start" after insert-negate-ra-state-pass {
 
-# RUN: llvm-objdump %t.exe.bolt -d > %t.exe.bolt.dump
-# RUN: llvm-objdump --dwarf=frames %t.exe.bolt  > %t.exe.bolt.dump-dwarf
-# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > %t.bolt.match-dwarf.txt
+# Check that the functions are in the new .text section
+# RUN: llvm-objdump %t.exe.bolt -d -j .text | FileCheck %s --check-prefix=CHECK-OBJDUMP
+# CHECK-OBJDUMP: <foo>:
+# CHECK-OBJDUMP: <bar>:
 
-# RUN: diff %t.match-dwarf.txt %t.bolt.match-dwarf.txt
 
-	.text
-	.globl	foo
-	.p2align	2
-	.type	foo, at function
+  .text
+  .globl  foo
+  .p2align        2
+  .type   foo, at function
 foo:
-	.cfi_startproc
-	hint	#25
-	.cfi_negate_ra_state
-	sub	sp, sp, #16
-	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
-	.cfi_def_cfa_offset 16
-	str	w0, [sp, #12]
-	ldr	w8, [sp, #12]
-	add	w0, w8, #1
-	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
-	add	sp, sp, #16
-	hint	#29
-	.cfi_negate_ra_state
-	ret
-.Lfunc_end1:
-	.size	foo, .Lfunc_end1-foo
-	.cfi_endproc
-
-	.global _start
-	.type _start, %function
+  .cfi_startproc
+  paciasp
+  .cfi_negate_ra_state
+  mov x1, #0
+  b bar
+  .cfi_endproc
+  .size   foo, .-foo
+
+
+
+  .text
+  .globl  bar
+  .p2align        2
+  .type   bar, at function
+bar:
+  .cfi_startproc
+  .cfi_negate_ra_state    // Indicating that RA is signed from the start of bar.
+  mov x1, #0
+  mov x1, #1
+  autiasp
+  .cfi_negate_ra_state
+  ret
+  .cfi_endproc
+  .size   bar, .-bar
+
+  .global _start
+  .type _start, %function
 _start:
-	b foo
+  b foo
diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py
index 60617cd76f968..0d05229be2bf3 100644
--- a/bolt/test/lit.cfg.py
+++ b/bolt/test/lit.cfg.py
@@ -89,7 +89,6 @@
 config.substitutions.append(("%cxxflags", ""))
 
 link_fdata_cmd = os.path.join(config.test_source_root, "link_fdata.py")
-match_dwarf_cmd = os.path.join(config.test_source_root, "match_dwarf.py")
 
 tool_dirs = [config.llvm_tools_dir, config.test_source_root]
 
@@ -132,12 +131,6 @@
     ToolSubst("llvm-readobj", unresolved="fatal"),
     ToolSubst("llvm-dwp", unresolved="fatal"),
     ToolSubst("split-file", unresolved="fatal"),
-    ToolSubst(
-        "match-dwarf",
-        command=sys.executable,
-        unresolved="fatal",
-        extra_args=[match_dwarf_cmd],
-    ),
 ]
 llvm_config.add_tool_substitutions(tools, tool_dirs)
 
diff --git a/bolt/test/match_dwarf.py b/bolt/test/match_dwarf.py
deleted file mode 100755
index 3d3ab22042d5c..0000000000000
--- a/bolt/test/match_dwarf.py
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/env python3
-
-# This tool helps matching dwarf dumps
-# (= the output from running llvm-objdump --dwarf=frames),
-# by address to function names (which are parsed from a normal objdump).
-# The script is used for checking if .cfi_negate_ra_state CFIs
-# are generated by BOLT the same way they are generated by LLVM.
-# The script is called twice in unittests: once with the objdumps of
-# the BOLT input binary, and once with the output binary from BOLT.
-# We output the offsets of .cfi_negate_ra_state instructions from the
-# function's start address to see that BOLT can generate them to the same
-# locations.
-# Because we check the location, this is only useful for testing without
-# optimization flags, so `llvm-bolt input.exe -o output.exe`
-
-
-import argparse
-import subprocess
-import sys
-import re
-
-
-class NameDwarfPair(object):
-    def __init__(self, name, body):
-        self.name = name
-        self.body = body
-        self.finalized = False
-
-    def append(self, body_line):
-        # only store elements into the body until the first whitespace line is encountered.
-        if body_line.isspace():
-            self.finalized = True
-        if not self.finalized:
-            self.body += body_line
-
-    def print(self):
-        print(self.name)
-        print(self.body)
-
-    def parse_negate_offsets(self):
-        """
-        Create a list of locations/offsets of the negate_ra_state CFIs in the
-        dwarf entry. To find offsets for each, we match the DW_CFA_advance_loc
-        entries, and sum up their values.
-        """
-        negate_offsets = []
-        loc = 0
-        # TODO: make sure this is not printed in hex
-        re_advloc = r"DW_CFA_advance_loc: (\d+)"
-
-        for line in self.body.splitlines():
-            # if line matches advance_loc int
-            match = re.search(re_advloc, line)
-            if match:
-                loc += int(match.group(1))
-            if "DW_CFA_AARCH64_negate_ra_state" in line:
-                negate_offsets.append(loc)
-
-        self.negate_offsets = negate_offsets
-
-    def __eq__(self, other):
-        return self.name == other.name and self.negate_offsets == other.negate_offsets
-
-
-def extract_function_addresses(objdump):
-    """
-    Parse and return address-to-name dictionary from objdump file.
-    Function names in the objdump look like this:
-        000123abc <foo>:
-    We create a dict from the addr (000123abc), to the name (foo).
-    """
-    addr_name_dict = dict()
-    re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$")
-    with open(objdump, "r") as f:
-        for line in f.readlines():
-            match = re_function.match(line)
-            if not match:
-                continue
-            m_addr = match.groups()[0]
-            m_name = match.groups()[1]
-            addr_name_dict[int(m_addr, 16)] = m_name
-
-    return addr_name_dict
-
-
-def match_dwarf_to_name(dwarfdump, addr_name_dict):
-    """
-    Parse dwarf dump, and match names to blocks using the dict from the objdump.
-    Return a list of NameDwarfPairs.
-    The matched lines look like this:
-    000123 000456 000789 FDE cie=000000  pc=0123abc...0456def
-    We do not have the function name for this, only the PC range it applies to.
-    We match the pc=0123abc (the start address), and find the matching name from
-    the addr_name_dict.
-    The resultint NameDwarfPair will hold the lines this header applied to, and
-    instead of the header with the addresses, it will just have the function name.
-    """
-    re_address_line = re.compile(r".*pc=([0-9a-fA-F]+)\.\.\.([0-9a-fA-F]+)")
-    with open(dwarfdump, "r") as dw:
-        functions = []
-        for line in dw.readlines():
-            match = re_address_line.match(line)
-            if not match:
-                if len(functions) > 0:
-                    functions[-1].append(line)
-                continue
-            pc_start_address = match.groups()[0]
-            name = addr_name_dict.get(int(pc_start_address, 16))
-            functions.append(NameDwarfPair(name, ""))
-
-        return functions
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("objdump", help="Objdump file")
-    parser.add_argument(
-        "dwarfdump", help="dwarf dump file created with 'llvm-objdump --dwarf=frames'"
-    )
-    parser.add_argument("function", help="Function to search CFIs in.")
-
-    args = parser.parse_args()
-
-    addr_name_dict = extract_function_addresses(args.objdump)
-    functions = match_dwarf_to_name(args.dwarfdump, addr_name_dict)
-
-    for f in functions:
-        if f.name == args.function:
-            f.parse_negate_offsets()
-            print(f.negate_offsets)
-            break
-    else:
-        print(f"{args.function} not found")
-        exit(-1)
-
-
-main()

>From 04452435d046a3749d7345574749c4452776710f Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 11 Aug 2025 12:46:30 +0000
Subject: [PATCH 18/20] [BOLT][NFC] Simplify RAState tracking

- Remove 'Authenticating' and 'Signing' MCAnnotations.
- The same logic can be done using only 'Signed' and 'Unsigned' annotations.
- To check if an instruction is signing or authenticating, we can use
  the PSignOnLR, and PAuthOnLR functions.
---
 bolt/include/bolt/Core/MCPlus.h               |  2 --
 bolt/include/bolt/Core/MCPlusBuilder.h        | 12 --------
 .../bolt/Passes/InsertNegateRAStatePass.h     |  9 ------
 bolt/lib/Core/MCPlusBuilder.cpp               | 21 +-------------
 bolt/lib/Passes/InsertNegateRAStatePass.cpp   | 28 ++-----------------
 bolt/lib/Passes/MarkRAStates.cpp              |  8 ++++--
 6 files changed, 10 insertions(+), 70 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h
index a95bba36c5a6e..8b1f7033ffad1 100644
--- a/bolt/include/bolt/Core/MCPlus.h
+++ b/bolt/include/bolt/Core/MCPlus.h
@@ -72,9 +72,7 @@ class MCAnnotation {
     kLabel,               /// MCSymbol pointing to this instruction.
     kSize,                /// Size of the instruction.
     kDynamicBranch,       /// Jit instruction patched at runtime.
-    kSigning,             /// Inst is a signing instruction (paciasp, etc.).
     kSigned,              /// Inst is in a range where RA is signed.
-    kAuthenticating,      /// Authenticating inst (e.g. autiasp).
     kUnsigned,            /// Inst is in a range where RA is unsigned.
     kRememberState,       /// Inst has rememberState CFI.
     kRestoreState,        /// Inst has restoreState CFI.
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index efea2f4590ee7..6295d4f43d191 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1342,18 +1342,6 @@ class MCPlusBuilder {
   /// Return true if \p Inst has Signed RA annotation.
   bool isRASigned(const MCInst &Inst) const;
 
-  /// Stores RA Signing annotation on \p Inst.
-  void setRASigning(MCInst &Inst) const;
-
-  /// Return true if \p Inst has Signing RA annotation.
-  bool isRASigning(const MCInst &Inst) const;
-
-  /// Stores Authenticating annotation on \p Inst.
-  void setAuthenticating(MCInst &Inst) const;
-
-  /// Return true if \p Inst has Authenticating annotation.
-  bool isAuthenticating(const MCInst &Inst) const;
-
   /// Stores RA Unsigned annotation on \p Inst.
   void setRAUnsigned(MCInst &Inst) const;
 
diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
index 0b1654c1380b9..836948bf5e9c0 100644
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -28,15 +28,6 @@ class InsertNegateRAState : public BinaryFunctionPass {
   void runOnFunction(BinaryFunction &BF);
 
 private:
-  /// Loops over all instructions and adds OpNegateRAState CFI
-  /// after any pointer signing or authenticating instructions,
-  /// which operate on the LR, except fused pauth + ret instructions
-  /// (such as RETAA). Normal pauth and psign instructions are "special cases",
-  /// meaning they always need an OpNegateRAState CFI after them.
-  /// Fused pauth + ret instructions are not, they work as any other
-  /// instruction.
-  /// Returns true, if any OpNegateRAState CFIs were added.
-  bool addNegateRAStateAfterPSignOrPAuth(BinaryFunction &BF);
   /// Because states are tracked as MCAnnotations on individual instructions,
   /// newly inserted instructions do not have a state associated with them.
   /// New states are "inherited" from the last known state.
diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp
index aa2a784442a5f..5afc14a6e8854 100644
--- a/bolt/lib/Core/MCPlusBuilder.cpp
+++ b/bolt/lib/Core/MCPlusBuilder.cpp
@@ -185,24 +185,6 @@ bool MCPlusBuilder::isRASigned(const MCInst &Inst) const {
   return hasAnnotation(Inst, MCAnnotation::kSigned);
 }
 
-void MCPlusBuilder::setRASigning(MCInst &Inst) const {
-  assert(!hasAnnotation(Inst, MCAnnotation::kSigning));
-  setAnnotationOpValue(Inst, MCAnnotation::kSigning, true);
-}
-
-bool MCPlusBuilder::isRASigning(const MCInst &Inst) const {
-  return hasAnnotation(Inst, MCAnnotation::kSigning);
-}
-
-void MCPlusBuilder::setAuthenticating(MCInst &Inst) const {
-  assert(!hasAnnotation(Inst, MCAnnotation::kAuthenticating));
-  setAnnotationOpValue(Inst, MCAnnotation::kAuthenticating, true);
-}
-
-bool MCPlusBuilder::isAuthenticating(const MCInst &Inst) const {
-  return hasAnnotation(Inst, MCAnnotation::kAuthenticating);
-}
-
 void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const {
   assert(!hasAnnotation(Inst, MCAnnotation::kUnsigned));
   setAnnotationOpValue(Inst, MCAnnotation::kUnsigned, true);
@@ -213,8 +195,7 @@ bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const {
 }
 
 bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const {
-  return !(isRAUnsigned(Inst) || isRASigned(Inst) || isRASigning(Inst) ||
-           isAuthenticating(Inst));
+  return !(isRAUnsigned(Inst) || isRASigned(Inst));
 }
 
 std::optional<MCLandingPad> MCPlusBuilder::getEHInfo(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index 60af3485ea4d1..273944b860e36 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -34,9 +34,6 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
     return;
   }
 
-  // Attach .cfi_negate_ra_state to the "trivial" cases first.
-  addNegateRAStateAfterPSignOrPAuth(BF);
-
   inferUnknownStates(BF);
 
   for (FunctionFragment &FF : BF.getLayout().fragments()) {
@@ -67,24 +64,6 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
   }
 }
 
-bool InsertNegateRAState::addNegateRAStateAfterPSignOrPAuth(
-    BinaryFunction &BF) {
-  BinaryContext &BC = BF.getBinaryContext();
-  bool FoundAny = false;
-  for (BinaryBasicBlock &BB : BF) {
-    for (auto Iter = BB.begin(); Iter != BB.end(); ++Iter) {
-      MCInst &Inst = *Iter;
-      if (BC.MIB->isPSignOnLR(Inst) ||
-          (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) {
-        Iter = BF.addCFIInstruction(
-            &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr));
-        FoundAny = true;
-      }
-    }
-  }
-  return FoundAny;
-}
-
 void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF,
                                                      FunctionFragment &FF) {
   BinaryContext &BC = BF.getBinaryContext();
@@ -102,8 +81,7 @@ void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF,
       });
   // If a function is already split in the input, the first FF can also start
   // with Signed state. This covers that scenario as well.
-  if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin())) ||
-      BC.MIB->isAuthenticating(*((*FirstNonEmpty)->begin()))) {
+  if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) {
     BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(),
                          MCCFIInstruction::createNegateRAState(nullptr));
   }
@@ -121,10 +99,10 @@ void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) {
         continue;
 
       if (!FirstIter && BC.MIB->isRAStateUnknown(Inst)) {
-        if (BC.MIB->isRASigned(PrevInst) || BC.MIB->isRASigning(PrevInst)) {
+        if (BC.MIB->isRASigned(PrevInst) || BC.MIB->isPSignOnLR(PrevInst)) {
           BC.MIB->setRASigned(Inst);
         } else if (BC.MIB->isRAUnsigned(PrevInst) ||
-                   BC.MIB->isAuthenticating(PrevInst)) {
+                   BC.MIB->isPAuthOnLR(PrevInst)) {
           BC.MIB->setRAUnsigned(Inst);
         }
       } else {
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index aacb14fb7dcc9..8ce1b67814608 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -78,7 +78,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
           BF.setIgnored();
           return;
         }
-        BC.MIB->setRASigning(Inst);
+        // The signing instruction itself is unsinged, but the next will be
+        // signed.
+        BC.MIB->setRAUnsigned(Inst);
       } else if (BC.MIB->isPAuthOnLR(Inst)) {
         if (!RAState) {
           // RA authenticating instructions should only follow signed RA state.
@@ -89,7 +91,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
           BF.setIgnored();
           return;
         }
-        BC.MIB->setAuthenticating(Inst);
+        // The authenticating instruction itself is signed, but the next will be
+        // unsigned.
+        BC.MIB->setRASigned(Inst);
       } else if (RAState) {
         BC.MIB->setRASigned(Inst);
       } else {

>From 9a4ff401d18dd203c0ec79c9cc2c8fbe1532f5e2 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 13 Aug 2025 12:44:50 +0000
Subject: [PATCH 19/20] [BOLT] Add negate-ra-state-reorder test

- Test checks that that reordering BBs change the number of
  negate-ra-state CFIs, and their locations.
---
 bolt/test/AArch64/negate-ra-state-reorder.s | 73 +++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 bolt/test/AArch64/negate-ra-state-reorder.s

diff --git a/bolt/test/AArch64/negate-ra-state-reorder.s b/bolt/test/AArch64/negate-ra-state-reorder.s
new file mode 100644
index 0000000000000..2659f75aff9c9
--- /dev/null
+++ b/bolt/test/AArch64/negate-ra-state-reorder.s
@@ -0,0 +1,73 @@
+# Checking that after reordering BasicBlocks, the generated OpNegateRAState instructions
+# are placed where the RA state is different between two consecutive instructions.
+# This case demonstrates, that the input might have a different amount than the output:
+# input has 4, but output only has 3.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags  %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads --reorder-blocks=reverse \
+# RUN: --print-cfg --print-after-lowering --print-only foo | FileCheck %s
+
+# Check that the reordering succeeded.
+# CHECK: Binary Function "foo" after building cfg {
+# CHECK: BB Layout   : .LBB00, .Ltmp2, .Ltmp0, .Ltmp1
+# CHECK: Binary Function "foo" after inst-lowering {
+# CHECK: BB Layout   : .LBB00, .Ltmp1, .Ltmp0, .Ltmp2
+
+
+# Check the generated CFIs.
+# CHECK:         OpNegateRAState
+# CHECK-NEXT:    mov     x2, #0x6
+
+# CHECK:         autiasp
+# CHECK-NEXT:    OpNegateRAState
+# CHECK-NEXT:    ret
+
+# CHECK:         paciasp
+# CHECK-NEXT:    OpNegateRAState
+
+# CHECK:         DWARF CFI Instructions:
+# CHECK-NEXT:        0:  OpNegateRAState
+# CHECK-NEXT:        1:  OpNegateRAState
+# CHECK-NEXT:        2:  OpNegateRAState
+# CHECK-NEXT:    End of Function "foo"
+
+  .text
+  .globl  foo
+  .p2align        2
+  .type   foo, at function
+foo:
+  .cfi_startproc
+  // RA is unsigned
+  mov x1, #0
+  mov x1, #1
+  mov x1, #2
+  // jump into the signed "range"
+  b .Lmiddle
+.Lback:
+// sign RA
+  paciasp
+  .cfi_negate_ra_state
+  mov x2, #3
+  mov x2, #4
+  // skip unsigned instructions
+  b .Lcont
+  .cfi_negate_ra_state
+.Lmiddle:
+// RA is unsigned
+  mov x4, #5
+  b .Lback
+  .cfi_negate_ra_state
+.Lcont:
+// continue in signed state
+  mov x2, #6
+  autiasp
+  .cfi_negate_ra_state
+  ret
+  .cfi_endproc
+  .size   foo, .-foo
+
+  .global _start
+  .type _start, %function
+_start:
+  b foo

>From 5d500d52b5feb3a8d40a7dd6416c9a50f942ff83 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Thu, 14 Aug 2025 11:36:14 +0000
Subject: [PATCH 20/20] [BOLT] Print stats in MarkRAStates, InsertNegateRAState

- print statistics on the output about function getting Ignored in
  MarkRAStates, and on rewriting OpNegateRAState in
  InsertNegateRAStatePass.
- improve error messages in MarkRAStates
---
 .../bolt/Passes/InsertNegateRAStatePass.h     |  1 +
 bolt/include/bolt/Passes/MarkRAStates.h       |  5 ++-
 bolt/lib/Passes/InsertNegateRAStatePass.cpp   |  8 +++-
 bolt/lib/Passes/MarkRAStates.cpp              | 44 +++++++++++--------
 bolt/test/AArch64/negate-ra-state-incorrect.s | 11 ++---
 5 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
index 836948bf5e9c0..9acd28424c59a 100644
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -28,6 +28,7 @@ class InsertNegateRAState : public BinaryFunctionPass {
   void runOnFunction(BinaryFunction &BF);
 
 private:
+  uint64_t FunctionsModified{0};
   /// Because states are tracked as MCAnnotations on individual instructions,
   /// newly inserted instructions do not have a state associated with them.
   /// New states are "inherited" from the last known state.
diff --git a/bolt/include/bolt/Passes/MarkRAStates.h b/bolt/include/bolt/Passes/MarkRAStates.h
index e7a49f813b6a7..8870540aa7e9e 100644
--- a/bolt/include/bolt/Passes/MarkRAStates.h
+++ b/bolt/include/bolt/Passes/MarkRAStates.h
@@ -25,7 +25,10 @@ class MarkRAStates : public BinaryFunctionPass {
 
   /// Pass entry point
   Error runOnFunctions(BinaryContext &BC) override;
-  void runOnFunction(BinaryFunction &BF);
+  bool runOnFunction(BinaryFunction &BF);
+
+private:
+  uint64_t FunctionsIgnored{0};
 };
 
 } // namespace bolt
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index 273944b860e36..7b93a0ca2b251 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -115,11 +115,12 @@ void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) {
 
 Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) {
   ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
-    if (BF.containedNegateRAState()) {
+    if (BF.containedNegateRAState() && !BF.isIgnored()) {
       // We can skip functions which did not include negate-ra-state CFIs. This
       // includes code using pac-ret hardening as well, if the binary is
       // compiled with `-fno-exceptions -fno-unwind-tables
       // -fno-asynchronous-unwind-tables`
+      FunctionsModified++;
       runOnFunction(BF);
     }
   };
@@ -128,6 +129,11 @@ Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) {
       BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr,
       "InsertNegateRAStatePass");
 
+  BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in "
+            << FunctionsModified << " out of " << BC.getBinaryFunctions().size()
+            << " functions "
+            << format("(%.2lf%%).\n", (100.0 * FunctionsModified) /
+                                          BC.getBinaryFunctions().size());
   return Error::success();
 }
 
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index 8ce1b67814608..2e766a6ed9be6 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -31,10 +31,7 @@ using namespace llvm;
 namespace llvm {
 namespace bolt {
 
-void MarkRAStates::runOnFunction(BinaryFunction &BF) {
-
-  if (BF.isIgnored())
-    return;
+bool MarkRAStates::runOnFunction(BinaryFunction &BF) {
 
   BinaryContext &BC = BF.getBinaryContext();
 
@@ -49,10 +46,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
         // annotation.
         BF.setIgnored();
         BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
-                  << BF.getPrintName() << "\n";
-        BC.outs()
-            << "BOLT-INFO: ptr sign/auth inst without .cfi_negate_ra_state\n";
-        return;
+                  << BF.getPrintName()
+                  << ": ptr sign/auth inst without .cfi_negate_ra_state\n";
+        return false;
       }
     }
   }
@@ -72,11 +68,10 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
         if (RAState) {
           // RA signing instructions should only follow unsigned RA state.
           BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
-                    << BF.getPrintName() << "\n";
-          BC.outs() << "BOLT-INFO: ptr signing inst encountered in Signed RA "
-                       "state.\n";
+                    << BF.getPrintName()
+                    << ": ptr signing inst encountered in Signed RA state\n";
           BF.setIgnored();
-          return;
+          return false;
         }
         // The signing instruction itself is unsinged, but the next will be
         // signed.
@@ -85,11 +80,11 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
         if (!RAState) {
           // RA authenticating instructions should only follow signed RA state.
           BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
-                    << BF.getPrintName() << "\n";
-          BC.outs() << "BOLT-INFO: ptr authenticating inst encountered in "
-                       "Unsigned RA state.\n";
+                    << BF.getPrintName()
+                    << ": ptr authenticating inst encountered in Unsigned RA "
+                       "state\n";
           BF.setIgnored();
-          return;
+          return false;
         }
         // The authenticating instruction itself is signed, but the next will be
         // unsigned.
@@ -120,16 +115,19 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
       }
     }
   }
+  return true;
 }
 
 Error MarkRAStates::runOnFunctions(BinaryContext &BC) {
   ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
-    if (BF.containedNegateRAState()) {
+    if (BF.containedNegateRAState() && !BF.isIgnored()) {
       // We can skip functions which did not include negate-ra-state CFIs. This
       // includes code using pac-ret hardening as well, if the binary is
       // compiled with `-fno-exceptions -fno-unwind-tables
       // -fno-asynchronous-unwind-tables`
-      runOnFunction(BF);
+      if (!runOnFunction(BF)) {
+        FunctionsIgnored++;
+      }
     }
   };
 
@@ -137,6 +135,16 @@ Error MarkRAStates::runOnFunctions(BinaryContext &BC) {
       BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr,
       "MarkRAStates");
 
+  int Total = llvm::count_if(
+      BC.getBinaryFunctions(),
+      [&](std::pair<const unsigned long, BinaryFunction> &P) {
+        return P.second.containedNegateRAState() && !P.second.isIgnored();
+      });
+  BC.outs() << "BOLT-INFO: MarkRAStates ran on " << Total
+            << " functions. Ignored " << FunctionsIgnored << " functions "
+            << format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+            << " because of CFI inconsistencies.\n";
+
   return Error::success();
 }
 
diff --git a/bolt/test/AArch64/negate-ra-state-incorrect.s b/bolt/test/AArch64/negate-ra-state-incorrect.s
index ad922d83c6d61..14d2c384a877d 100644
--- a/bolt/test/AArch64/negate-ra-state-incorrect.s
+++ b/bolt/test/AArch64/negate-ra-state-incorrect.s
@@ -10,14 +10,9 @@
 # RUN: %clang %cflags  %t.o -o %t.exe -Wl,-q
 # RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads | FileCheck %s --check-prefix=CHECK-BOLT
 
-# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function foo
-# CHECK-BOLT-NEXT: BOLT-INFO: ptr authenticating inst encountered in Unsigned RA state.
-
-# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function bar
-# CHECK-BOLT-NEXT: BOLT-INFO: ptr signing inst encountered in Signed RA state
-
-# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function baz
-# CHECK-BOLT-NEXT: BOLT-INFO: ptr sign/auth inst without .cfi_negate_ra_state
+# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function foo: ptr authenticating inst encountered in Unsigned RA state
+# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function bar: ptr signing inst encountered in Signed RA state
+# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function baz: ptr sign/auth inst without .cfi_negate_ra_state
 
 # Check that the incorrect functions got ignored, so they are not in the new .text section
 # RUN: llvm-objdump %t.exe.bolt -d -j .text | FileCheck %s --check-prefix=CHECK-OBJDUMP



More information about the llvm-commits mailing list