[llvm] [BOLT][AArch64] Handle OpNegateRAState to enable optimizing binaries with pac-ret hardening (PR #120064)

Gergely Bálint via llvm-commits llvm-commits at lists.llvm.org
Mon May 5 06:09:39 PDT 2025


https://github.com/bgergely0 updated https://github.com/llvm/llvm-project/pull/120064

>From ebd11601de85e062a2efab37df3b007367247ec6 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Tue, 19 Nov 2024 09:43:25 +0100
Subject: [PATCH 01/15] [BOLT] Recognize paciasp and autiasp instructions

---
 bolt/include/bolt/Core/MCPlusBuilder.h           | 7 +++++++
 bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 6 ++++++
 2 files changed, 13 insertions(+)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index cf37a984da93f..191f3e86ef7be 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -726,6 +726,13 @@ class MCPlusBuilder {
     llvm_unreachable("not implemented");
     return false;
   }
+  virtual bool isPAuth(MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+  }
+
+  virtual bool isPSign(MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+  }
 
   virtual bool isCleanRegXOR(const MCInst &Inst) const {
     llvm_unreachable("not implemented");
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index e00d6a18b0f6c..dcbd82b479549 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -676,6 +676,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
     return false;
   }
+  bool isPAuth(MCInst &Inst) const override {
+    return Inst.getOpcode() == AArch64::AUTIASP;
+  }
+  bool isPSign(MCInst &Inst) const override {
+    return Inst.getOpcode() == AArch64::PACIASP;
+  }
 
   bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From,
                       MCPhysReg &To) const override {

>From 79255b99bfa6aba3fc0dc23c7a5fab6be7905372 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 16 Dec 2024 10:45:41 +0100
Subject: [PATCH 02/15] [BOLT] Support for OpNegateRAState

- save OpNegateRAState, RememberState and RestoreState locations
  when parsing input
- determine the RA states from these before other optimizations
  in MarkRAStates Pass
- after optimizations, we can insert OpNegateRAState at state
  boundaries and other needed locations (e.g. split functions)
  in InsertNegateRAStatePass
---
 bolt/include/bolt/Core/BinaryFunction.h       |  46 ++++++
 bolt/include/bolt/Core/MCPlus.h               |  10 +-
 bolt/include/bolt/Core/MCPlusBuilder.h        |  59 +++++++
 .../bolt/Passes/InsertNegateRAStatePass.h     |  25 +++
 bolt/include/bolt/Passes/MarkRAStates.h       |  22 +++
 bolt/lib/Core/BinaryBasicBlock.cpp            |   6 +-
 bolt/lib/Core/BinaryFunction.cpp              |   1 +
 bolt/lib/Core/Exceptions.cpp                  |  20 ++-
 bolt/lib/Core/MCPlusBuilder.cpp               |  86 ++++++++++
 bolt/lib/Passes/CMakeLists.txt                |   2 +
 bolt/lib/Passes/InsertNegateRAStatePass.cpp   | 153 ++++++++++++++++++
 bolt/lib/Passes/MarkRAStates.cpp              | 122 ++++++++++++++
 bolt/lib/Rewrite/BinaryPassManager.cpp        |   6 +
 13 files changed, 554 insertions(+), 4 deletions(-)
 create mode 100644 bolt/include/bolt/Passes/InsertNegateRAStatePass.h
 create mode 100644 bolt/include/bolt/Passes/MarkRAStates.h
 create mode 100644 bolt/lib/Passes/InsertNegateRAStatePass.cpp
 create mode 100644 bolt/lib/Passes/MarkRAStates.cpp

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a52998564ee1b..a11fd2dc8976e 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -1621,6 +1621,52 @@ class BinaryFunction {
 
   void setHasInferredProfile(bool Inferred) { HasInferredProfile = Inferred; }
 
+  /// Find corrected offset the same way addCFIInstruction does it to skip NOPs.
+  std::optional<uint64_t> getCorrectedCFIOffset(uint64_t Offset) {
+    assert(!Instructions.empty());
+    auto I = Instructions.lower_bound(Offset);
+    if (Offset == getSize()) {
+      assert(I == Instructions.end() && "unexpected iterator value");
+      // Sometimes compiler issues restore_state after all instructions
+      // in the function (even after nop).
+      --I;
+      Offset = I->first;
+    }
+    assert(I->first == Offset && "CFI pointing to unknown instruction");
+    if (I == Instructions.begin()) {
+      return {};
+    }
+
+    --I;
+    while (I != Instructions.begin() && BC.MIB->isNoop(I->second)) {
+      Offset = I->first;
+      --I;
+    }
+    return Offset;
+  }
+
+  void setInstModifiesRAState(uint8_t CFIOpcode, uint64_t Offset) {
+    std::optional<uint64_t> CorrectedOffset = getCorrectedCFIOffset(Offset);
+    if (CorrectedOffset) {
+      auto I = Instructions.lower_bound(*CorrectedOffset);
+      I--;
+
+      switch (CFIOpcode) {
+      case dwarf::DW_CFA_GNU_window_save:
+        BC.MIB->setNegateRAState(I->second);
+        break;
+      case dwarf::DW_CFA_remember_state:
+        BC.MIB->setRememberState(I->second);
+        break;
+      case dwarf::DW_CFA_restore_state:
+        BC.MIB->setRestoreState(I->second);
+        break;
+      default:
+        assert(0 && "CFI Opcode not covered by function");
+      }
+    }
+  }
+
   void addCFIInstruction(uint64_t Offset, MCCFIInstruction &&Inst) {
     assert(!Instructions.empty());
 
diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h
index 601d709712864..78387ad8f5b98 100644
--- a/bolt/include/bolt/Core/MCPlus.h
+++ b/bolt/include/bolt/Core/MCPlus.h
@@ -72,7 +72,15 @@ class MCAnnotation {
     kLabel,               /// MCSymbol pointing to this instruction.
     kSize,                /// Size of the instruction.
     kDynamicBranch,       /// Jit instruction patched at runtime.
-    kGeneric              /// First generic annotation.
+    kUnkownSign,          /// Signed state not determined yet
+    kSigning,             /// Inst is a signing instruction (paciasp, etc.)
+    kSigned,              /// Inst is in a range where RA is signed
+    kAuthenticating,      /// Authenticating inst (e.g. autiasp)
+    kUnsigned,            /// Inst is in a range where RA is unsigned
+    kRememberState,       /// Inst has rememberState CFI
+    kRestoreState,        /// Inst has restoreState CFI
+    kNegateState,         /// Inst has OpNegateRAState CFI
+    kGeneric,             /// First generic annotation.
   };
 
   virtual void print(raw_ostream &OS) const = 0;
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 191f3e86ef7be..a2bb6be0b8346 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -67,6 +67,20 @@ class MCPlusBuilder {
 public:
   using AllocatorIdTy = uint16_t;
 
+  std::optional<int64_t> getAnnotationAtOpIndex(const MCInst &Inst,
+                                                unsigned OpIndex) const {
+    std::optional<unsigned> FirstAnnotationOp = getFirstAnnotationOpIndex(Inst);
+    if (!FirstAnnotationOp)
+      return std::nullopt;
+
+    if (*FirstAnnotationOp > OpIndex || Inst.getNumOperands() < OpIndex)
+      return std::nullopt;
+
+    auto Op = Inst.begin() + OpIndex;
+    const int64_t ImmValue = Op->getImm();
+    return extractAnnotationIndex(ImmValue);
+  }
+
 private:
   /// A struct that represents a single annotation allocator
   struct AnnotationAllocator {
@@ -1207,6 +1221,51 @@ class MCPlusBuilder {
   /// Return true if the instruction is a tail call.
   bool isTailCall(const MCInst &Inst) const;
 
+  /// Stores NegateRAState annotation on Inst.
+  void setNegateRAState(MCInst &Inst) const;
+
+  /// Return true if Inst has NegateRAState annotation.
+  bool hasNegateRAState(const MCInst &Inst) const;
+
+  /// Sets RememberState annotation on Inst.
+  void setRememberState(MCInst &Inst) const;
+
+  /// Return true if Inst has RememberState annotation.
+  bool hasRememberState(const MCInst &Inst) const;
+
+  /// Stores RestoreState annotation on Inst.
+  void setRestoreState(MCInst &Inst) const;
+
+  /// Return true if Inst has RestoreState annotation.
+  bool hasRestoreState(const MCInst &Inst) const;
+
+  /// Stores RA Signed annotation on Inst.
+  void setRASigned(MCInst &Inst) const;
+
+  /// Return true if Inst has Signed RA annotation.
+  bool isRASigned(const MCInst &Inst) const;
+
+  /// Stores RA Signing annotation on Inst.
+  void setRASigning(MCInst &Inst) const;
+
+  /// Return true if Inst has Signing RA annotation.
+  bool isRASigning(const MCInst &Inst) const;
+
+  /// Stores Authenticating annotation on Inst.
+  void setAuthenticating(MCInst &Inst) const;
+
+  /// Return true if Inst has Authenticating annotation.
+  bool isAuthenticating(const MCInst &Inst) const;
+
+  /// Stores RA Unsigned annotation on Inst.
+  void setRAUnsigned(MCInst &Inst) const;
+
+  /// Return true if Inst has Unsigned RA annotation.
+  bool isRAUnsigned(const MCInst &Inst) const;
+
+  /// Return true if Inst doesn't have any annotation related to RA state.
+  bool isRAStateUnknown(const MCInst &Inst) const;
+
   /// Return true if the instruction is a call with an exception handling info.
   virtual bool isInvoke(const MCInst &Inst) const {
     return isCall(Inst) && getEHInfo(Inst);
diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
new file mode 100644
index 0000000000000..8cf08add9402a
--- /dev/null
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -0,0 +1,25 @@
+#ifndef BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS
+#define BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS
+
+#include "bolt/Passes/BinaryPasses.h"
+#include <stack>
+
+namespace llvm {
+namespace bolt {
+
+class InsertNegateRAState : public BinaryFunctionPass {
+public:
+  explicit InsertNegateRAState() : BinaryFunctionPass(false) {}
+
+  const char *getName() const override { return "insert-negate-ra-state-pass"; }
+
+  /// Pass entry point
+  Error runOnFunctions(BinaryContext &BC) override;
+  void runOnFunction(BinaryFunction &BF);
+  bool addNegateRAStateAfterPacOrAuth(BinaryFunction &BF);
+  void fixUnknownStates(BinaryFunction &BF);
+};
+
+} // namespace bolt
+} // namespace llvm
+#endif
diff --git a/bolt/include/bolt/Passes/MarkRAStates.h b/bolt/include/bolt/Passes/MarkRAStates.h
new file mode 100644
index 0000000000000..3cbd6044683da
--- /dev/null
+++ b/bolt/include/bolt/Passes/MarkRAStates.h
@@ -0,0 +1,22 @@
+#ifndef BOLT_PASSES_MARK_RA_STATES
+#define BOLT_PASSES_MARK_RA_STATES
+
+#include "bolt/Passes/BinaryPasses.h"
+
+namespace llvm {
+namespace bolt {
+
+class MarkRAStates : public BinaryFunctionPass {
+public:
+  explicit MarkRAStates() : BinaryFunctionPass(false) {}
+
+  const char *getName() const override { return "mark-ra-states"; }
+
+  /// Pass entry point
+  Error runOnFunctions(BinaryContext &BC) override;
+  void runOnFunction(BinaryFunction &BF);
+};
+
+} // namespace bolt
+} // namespace llvm
+#endif
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp
index 311d5c15b8dca..22126eed67a6c 100644
--- a/bolt/lib/Core/BinaryBasicBlock.cpp
+++ b/bolt/lib/Core/BinaryBasicBlock.cpp
@@ -201,7 +201,11 @@ int32_t BinaryBasicBlock::getCFIStateAtInstr(const MCInst *Instr) const {
       InstrSeen = (&Inst == Instr);
       continue;
     }
-    if (Function->getBinaryContext().MIB->isCFI(Inst)) {
+    // Ignoring OpNegateRAState CFIs here, as they dont have a "State"
+    // number associated with them.
+    if (Function->getBinaryContext().MIB->isCFI(Inst) &&
+        (Function->getCFIFor(Inst)->getOperation() !=
+         MCCFIInstruction::OpNegateRAState)) {
       LastCFI = &Inst;
       break;
     }
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 9773e21aa7522..e2540a8077c9a 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -2788,6 +2788,7 @@ struct CFISnapshot {
   void advanceTo(int32_t State) {
     for (int32_t I = CurState, E = State; I != E; ++I) {
       const MCCFIInstruction &Instr = FDE[I];
+      assert(Instr.getOperation() != MCCFIInstruction::OpNegateRAState);
       if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
         update(Instr, I);
         continue;
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index 0b2e63b8ca6a7..d38b37bcb3c61 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -568,10 +568,21 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
     case DW_CFA_remember_state:
       Function.addCFIInstruction(
           Offset, MCCFIInstruction::createRememberState(nullptr));
+
+      if (Function.getBinaryContext().isAArch64())
+        // Support for pointer authentication:
+        // We need to annotate instructions that modify the RA State, to work
+        // out the state of each instruction in MarkRAStates Pass.
+        Function.setInstModifiesRAState(DW_CFA_remember_state, Offset);
       break;
     case DW_CFA_restore_state:
       Function.addCFIInstruction(Offset,
                                  MCCFIInstruction::createRestoreState(nullptr));
+      if (Function.getBinaryContext().isAArch64())
+        // Support for pointer authentication:
+        // We need to annotate instructions that modify the RA State, to work
+        // out the state of each instruction in MarkRAStates Pass.
+        Function.setInstModifiesRAState(DW_CFA_restore_state, Offset);
       break;
     case DW_CFA_def_cfa:
       Function.addCFIInstruction(
@@ -632,8 +643,13 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
       // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
       // id but mean different things. The latter is used in AArch64.
       if (Function.getBinaryContext().isAArch64()) {
-        Function.addCFIInstruction(
-            Offset, MCCFIInstruction::createNegateRAState(nullptr));
+        // Not adding OpNegateRAState since the location they are needed
+        // depends on the order of BasicBlocks, which changes during
+        // optimizations. Instead, an annotation is added to the instruction, to
+        // mark that the instruction modifies the RA State. The actual state for
+        // instructions are worked out in MarkRAStates based on these
+        // annotations.
+        Function.setInstModifiesRAState(DW_CFA_GNU_window_save, Offset);
         break;
       }
       if (opts::Verbosity >= 1)
diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp
index 7752079b61538..cd7b8bf0efa75 100644
--- a/bolt/lib/Core/MCPlusBuilder.cpp
+++ b/bolt/lib/Core/MCPlusBuilder.cpp
@@ -147,6 +147,92 @@ bool MCPlusBuilder::isTailCall(const MCInst &Inst) const {
   return false;
 }
 
+void MCPlusBuilder::setNegateRAState(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kNegateState));
+  setAnnotationOpValue(Inst, MCAnnotation::kNegateState, true);
+}
+
+bool MCPlusBuilder::hasNegateRAState(const MCInst &Inst) const {
+  if (hasAnnotation(Inst, MCAnnotation::kNegateState))
+    return true;
+  return false;
+}
+
+void MCPlusBuilder::setRememberState(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kRememberState));
+  setAnnotationOpValue(Inst, MCAnnotation::kRememberState, true);
+}
+
+bool MCPlusBuilder::hasRememberState(const MCInst &Inst) const {
+  if (hasAnnotation(Inst, MCAnnotation::kRememberState))
+    return true;
+  return false;
+}
+
+void MCPlusBuilder::setRestoreState(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kRestoreState));
+  setAnnotationOpValue(Inst, MCAnnotation::kRestoreState, true);
+}
+
+bool MCPlusBuilder::hasRestoreState(const MCInst &Inst) const {
+  if (hasAnnotation(Inst, MCAnnotation::kRestoreState))
+    return true;
+  return false;
+}
+
+void MCPlusBuilder::setRASigned(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kSigned));
+  setAnnotationOpValue(Inst, MCAnnotation::kSigned, true);
+}
+
+bool MCPlusBuilder::isRASigned(const MCInst &Inst) const {
+  if (hasAnnotation(Inst, MCAnnotation::kSigned))
+    return true;
+  return false;
+}
+
+void MCPlusBuilder::setRASigning(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kSigning));
+  setAnnotationOpValue(Inst, MCAnnotation::kSigning, true);
+}
+
+bool MCPlusBuilder::isRASigning(const MCInst &Inst) const {
+  if (hasAnnotation(Inst, MCAnnotation::kSigning))
+    return true;
+  return false;
+}
+
+void MCPlusBuilder::setAuthenticating(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kAuthenticating));
+  setAnnotationOpValue(Inst, MCAnnotation::kAuthenticating, true);
+}
+
+bool MCPlusBuilder::isAuthenticating(const MCInst &Inst) const {
+  if (hasAnnotation(Inst, MCAnnotation::kAuthenticating))
+    return true;
+  return false;
+}
+
+void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const {
+  assert(!hasAnnotation(Inst, MCAnnotation::kUnsigned));
+  setAnnotationOpValue(Inst, MCAnnotation::kUnsigned, true);
+}
+
+bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const {
+  if (hasAnnotation(Inst, MCAnnotation::kUnsigned))
+    return true;
+  return false;
+}
+
+bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const {
+  if (hasAnnotation(Inst, MCAnnotation::kUnsigned) ||
+      hasAnnotation(Inst, MCAnnotation::kSigned) ||
+      hasAnnotation(Inst, MCAnnotation::kSigning) ||
+      hasAnnotation(Inst, MCAnnotation::kAuthenticating))
+    return false;
+  return true;
+}
+
 std::optional<MCLandingPad> MCPlusBuilder::getEHInfo(const MCInst &Inst) const {
   if (!isCall(Inst))
     return std::nullopt;
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index 77d2bb9c2bcb5..d7519518f186f 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -17,12 +17,14 @@ add_llvm_library(LLVMBOLTPasses
   IdenticalCodeFolding.cpp
   IndirectCallPromotion.cpp
   Inliner.cpp
+  InsertNegateRAStatePass.cpp
   Instrumentation.cpp
   JTFootprintReduction.cpp
   LongJmp.cpp
   LoopInversionPass.cpp
   LivenessAnalysis.cpp
   MCF.cpp
+  MarkRAStates.cpp
   PatchEntries.cpp
   PAuthGadgetScanner.cpp
   PettisAndHansen.cpp
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
new file mode 100644
index 0000000000000..b0945e4065f03
--- /dev/null
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -0,0 +1,153 @@
+//===- bolt/Passes/InsertNegateRAStatePass.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InsertNegateRAStatePass class. It inserts
+// OpNegateRAState CFIs to places where the state of two consecutive
+// instructions are different.
+//
+//===----------------------------------------------------------------------===//
+#include "bolt/Passes/InsertNegateRAStatePass.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+
+using namespace llvm;
+
+namespace llvm {
+namespace bolt {
+
+void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
+  BinaryContext &BC = BF.getBinaryContext();
+
+  if (BF.getState() == BinaryFunction::State::Empty) {
+    return;
+  }
+
+  if (BF.getState() != BinaryFunction::State::CFG &&
+      BF.getState() != BinaryFunction::State::CFG_Finalized) {
+    BC.outs() << "BOLT-INFO: No CFG for " << BF.getPrintName()
+              << " in InsertNegateRAStatePass\n";
+    return;
+  }
+
+  // If none is inserted, the function doesn't need more work.
+  if (!addNegateRAStateAfterPacOrAuth(BF)) {
+    BC.outs() << "BOLT-INFO: no pacret found.\n";
+    return;
+  }
+
+  fixUnknownStates(BF);
+
+  bool FirstIter = true;
+  MCInst PrevInst;
+  BinaryBasicBlock *PrevBB = nullptr;
+  auto *Begin = BF.getLayout().block_begin();
+  auto *End = BF.getLayout().block_end();
+  for (auto *BB = Begin; BB != End; BB++) {
+
+    // Support for function splitting:
+    // if two consecutive BBs are going to end up in different functions,
+    // we have to negate the RA State, so the new function starts with a Signed
+    // state.
+    if (PrevBB != nullptr &&
+        PrevBB->getFragmentNum() != (*BB)->getFragmentNum() &&
+        BC.MIB->isRASigned(*((*BB)->begin()))) {
+      BF.addCFIInstruction(*BB, (*BB)->begin(),
+                           MCCFIInstruction::createNegateRAState(nullptr));
+    }
+
+    for (auto It = (*BB)->begin(); It != (*BB)->end(); ++It) {
+
+      MCInst &Inst = *It;
+      if (BC.MIB->isCFI(Inst))
+        continue;
+
+      if (!FirstIter) {
+        if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) ||
+            (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) {
+
+          It = BF.addCFIInstruction(
+              *BB, It, MCCFIInstruction::createNegateRAState(nullptr));
+        }
+
+      } else {
+        FirstIter = false;
+      }
+      PrevInst = Inst;
+    }
+    PrevBB = *BB;
+  }
+}
+
+bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) {
+  BinaryContext &BC = BF.getBinaryContext();
+  bool FoundAny = false;
+  for (BinaryBasicBlock &BB : BF) {
+    for (auto Iter = BB.begin(); Iter != BB.end(); ++Iter) {
+      MCInst &Inst = *Iter;
+      if (BC.MIB->isPSign(Inst)) {
+        Iter = BF.addCFIInstruction(
+            &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr));
+        FoundAny = true;
+      }
+
+      if (BC.MIB->isPAuth(Inst)) {
+        Iter = BF.addCFIInstruction(
+            &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr));
+        FoundAny = true;
+      }
+    }
+  }
+  return FoundAny;
+}
+
+void InsertNegateRAState::fixUnknownStates(BinaryFunction &BF) {
+  BinaryContext &BC = BF.getBinaryContext();
+  bool FirstIter = true;
+  MCInst PrevInst;
+  for (auto BBIt = BF.begin(); BBIt != BF.end(); ++BBIt) {
+    BinaryBasicBlock &BB = *BBIt;
+
+    for (auto It = BB.begin(); It != BB.end(); ++It) {
+
+      MCInst &Inst = *It;
+      if (BC.MIB->isCFI(Inst))
+        continue;
+
+      if (!FirstIter && BC.MIB->isRAStateUnknown(Inst)) {
+        if (BC.MIB->isRASigned(PrevInst) || BC.MIB->isRASigning(PrevInst)) {
+          BC.MIB->setRASigned(Inst);
+        } else if (BC.MIB->isRAUnsigned(PrevInst) ||
+                   BC.MIB->isAuthenticating(PrevInst)) {
+          BC.MIB->setRAUnsigned(Inst);
+        }
+      } else {
+        FirstIter = false;
+      }
+      PrevInst = Inst;
+    }
+  }
+}
+
+Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) {
+  ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
+    runOnFunction(BF);
+  };
+
+  ParallelUtilities::runOnEachFunction(
+      BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr,
+      "InsertNegateRAStatePass");
+
+  return Error::success();
+}
+
+} // end namespace bolt
+} // end namespace llvm
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
new file mode 100644
index 0000000000000..03affab2f2154
--- /dev/null
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -0,0 +1,122 @@
+//===- bolt/Passes/MarkRAStates.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MarkRAStates class.
+// Three CFIs have an influence on the RA State of an instruction:
+// - NegateRAState flips the RA State,
+// - RememberState pushes the RA State to a stack,
+// - RestoreState pops the RA State from the stack.
+// These were saved as MCAnnotations on instructions they refer to at CFI
+// reading (in CFIReaderWriter::fillCFIInfoFor). In this pass, we can work out
+// the RA State of each instruction, and save it as new MCAnnotations. The new
+// annotations are Signing, Signed, Authenticating and Unsigned. After
+// optimizations, .cfi_negate_ra_state CFIs are added to the places where the
+// state changes in InsertNegateRAStatePass.
+//
+//===----------------------------------------------------------------------===//
+#include "bolt/Passes/MarkRAStates.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+
+#include <iostream>
+#include <optional>
+#include <stack>
+
+using namespace llvm;
+
+namespace llvm {
+namespace bolt {
+
+void MarkRAStates::runOnFunction(BinaryFunction &BF) {
+
+  if (BF.isIgnored())
+    return;
+
+  BinaryContext &BC = BF.getBinaryContext();
+
+  for (BinaryBasicBlock &BB : BF) {
+    for (auto It = BB.begin(); It != BB.end(); ++It) {
+      MCInst &Inst = *It;
+      if ((BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) &&
+          !BC.MIB->hasNegateRAState(Inst)) {
+        // no .cfi_negate_ra_state attached to signing or authenticating instr
+        // means, that this is a function with handwritten assembly, which might
+        // not respect Clang's conventions (e.g. tailcalls are always
+        // authenticated, so functions always start with unsigned RAState when
+        // working with compiler-generated code)
+        BF.setIgnored();
+        BC.outs() << "BOLT-INFO: ignoring RAStates in function "
+                  << BF.getPrintName() << "\n";
+        return;
+      }
+    }
+  }
+
+  bool RAState = false;
+  std::stack<bool> RAStateStack;
+
+  for (BinaryBasicBlock &BB : BF) {
+    for (auto It = BB.begin(); It != BB.end(); ++It) {
+
+      MCInst &Inst = *It;
+      if (BC.MIB->isCFI(Inst))
+        continue;
+
+      if (BC.MIB->isPSign(Inst)) {
+        assert(!RAState && "Signed RA State before PSign");
+        BC.MIB->setRASigning(Inst);
+
+      } else if (BC.MIB->isPAuth(Inst)) {
+        assert(RAState && "Unsigned RA State before PAuth");
+        BC.MIB->setAuthenticating(Inst);
+      } else if (RAState) {
+        BC.MIB->setRASigned(Inst);
+      } else {
+        BC.MIB->setRAUnsigned(Inst);
+      }
+
+      // Updating RAState. All updates are valid from the next instruction.
+      // Because the same instruction can have remember and restore, the order
+      // here is relevant. This is the reason to loop over Annotations instead
+      // of just checking each in a predefined order.
+      for (unsigned int Idx = 0; Idx < Inst.getNumOperands(); Idx++) {
+        std::optional<int64_t> Annotation =
+            BC.MIB->getAnnotationAtOpIndex(Inst, Idx);
+        if (!Annotation)
+          continue;
+        if (Annotation == MCPlus::MCAnnotation::kNegateState)
+          RAState = !RAState;
+        if (Annotation == MCPlus::MCAnnotation::kRememberState)
+          RAStateStack.push(RAState);
+        if (Annotation == MCPlus::MCAnnotation::kRestoreState) {
+          RAState = RAStateStack.top();
+          RAStateStack.pop();
+        }
+      }
+    }
+  }
+}
+
+Error MarkRAStates::runOnFunctions(BinaryContext &BC) {
+  ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
+    runOnFunction(BF);
+  };
+
+  ParallelUtilities::runOnEachFunction(
+      BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr,
+      "MarkRAStates");
+
+  return Error::success();
+}
+
+} // end namespace bolt
+} // end namespace llvm
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index dd48653931eb9..b69105ec008f1 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -19,11 +19,13 @@
 #include "bolt/Passes/IdenticalCodeFolding.h"
 #include "bolt/Passes/IndirectCallPromotion.h"
 #include "bolt/Passes/Inliner.h"
+#include "bolt/Passes/InsertNegateRAStatePass.h"
 #include "bolt/Passes/Instrumentation.h"
 #include "bolt/Passes/JTFootprintReduction.h"
 #include "bolt/Passes/LongJmp.h"
 #include "bolt/Passes/LoopInversionPass.h"
 #include "bolt/Passes/MCF.h"
+#include "bolt/Passes/MarkRAStates.h"
 #include "bolt/Passes/PLTCall.h"
 #include "bolt/Passes/PatchEntries.h"
 #include "bolt/Passes/ProfileQualityStats.h"
@@ -350,6 +352,8 @@ Error BinaryFunctionPassManager::runPasses() {
 Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
   BinaryFunctionPassManager Manager(BC);
 
+  Manager.registerPass(std::make_unique<MarkRAStates>());
+
   Manager.registerPass(
       std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts));
 
@@ -505,6 +509,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
     // targets. No extra instructions after this pass, otherwise we may have
     // relocations out of range and crash during linking.
     Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
+
+    Manager.registerPass(std::make_unique<InsertNegateRAState>());
   }
 
   // This pass should always run last.*

>From e3de43c17a1a3823b240f30754ec2b001ffbb110 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 16 Dec 2024 12:19:57 +0100
Subject: [PATCH 03/15] [BOLT] only run MarkRAState pass on AArch64

---
 bolt/lib/Rewrite/BinaryPassManager.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index b69105ec008f1..ac50168ff67a8 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -352,7 +352,8 @@ Error BinaryFunctionPassManager::runPasses() {
 Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
   BinaryFunctionPassManager Manager(BC);
 
-  Manager.registerPass(std::make_unique<MarkRAStates>());
+  if (BC.isAArch64())
+    Manager.registerPass(std::make_unique<MarkRAStates>());
 
   Manager.registerPass(
       std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts));

>From 0028e18053375be741ce3cfbf3af1e0f18bb02de Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 16 Dec 2024 13:08:34 +0100
Subject: [PATCH 04/15] [BOLT] InsertNegateRAStatePass: remove print

---
 bolt/lib/Passes/InsertNegateRAStatePass.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index b0945e4065f03..ecdd33fa36da6 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -39,10 +39,8 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
   }
 
   // If none is inserted, the function doesn't need more work.
-  if (!addNegateRAStateAfterPacOrAuth(BF)) {
-    BC.outs() << "BOLT-INFO: no pacret found.\n";
+  if (!addNegateRAStateAfterPacOrAuth(BF))
     return;
-  }
 
   fixUnknownStates(BF);
 

>From 3a5aa7de79c272f36cfd5bacd7458655f5b1f372 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Fri, 10 Jan 2025 13:44:45 +0100
Subject: [PATCH 05/15] [BOLT] Remove unused MCAnnotation

---
 bolt/include/bolt/Core/MCPlus.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h
index 78387ad8f5b98..6ff3c580da0d4 100644
--- a/bolt/include/bolt/Core/MCPlus.h
+++ b/bolt/include/bolt/Core/MCPlus.h
@@ -72,7 +72,6 @@ class MCAnnotation {
     kLabel,               /// MCSymbol pointing to this instruction.
     kSize,                /// Size of the instruction.
     kDynamicBranch,       /// Jit instruction patched at runtime.
-    kUnkownSign,          /// Signed state not determined yet
     kSigning,             /// Inst is a signing instruction (paciasp, etc.)
     kSigned,              /// Inst is in a range where RA is signed
     kAuthenticating,      /// Authenticating inst (e.g. autiasp)

>From aaa9e3c784d1ab58c88b51d78ebbdc71df2adc27 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 15 Jan 2025 12:50:38 +0100
Subject: [PATCH 06/15] [BOLT] Add changes from code review (#120064)

---
 bolt/include/bolt/Core/BinaryFunction.h       |  5 ++-
 bolt/include/bolt/Core/MCPlusBuilder.h        | 30 ++++++++--------
 .../bolt/Passes/InsertNegateRAStatePass.h     | 19 +++++++++++
 bolt/include/bolt/Passes/MarkRAStates.h       | 11 ++++++
 bolt/lib/Core/Exceptions.cpp                  | 16 ++++-----
 bolt/lib/Core/MCPlusBuilder.cpp               | 34 +++++--------------
 bolt/lib/Passes/InsertNegateRAStatePass.cpp   | 15 ++------
 bolt/lib/Passes/MarkRAStates.cpp              |  6 ++--
 8 files changed, 70 insertions(+), 66 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a11fd2dc8976e..97bcde39e5e5f 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -1633,9 +1633,8 @@ class BinaryFunction {
       Offset = I->first;
     }
     assert(I->first == Offset && "CFI pointing to unknown instruction");
-    if (I == Instructions.begin()) {
+    if (I == Instructions.begin())
       return {};
-    }
 
     --I;
     while (I != Instructions.begin() && BC.MIB->isNoop(I->second)) {
@@ -1652,7 +1651,7 @@ class BinaryFunction {
       I--;
 
       switch (CFIOpcode) {
-      case dwarf::DW_CFA_GNU_window_save:
+      case dwarf::DW_CFA_AARCH64_negate_ra_state:
         BC.MIB->setNegateRAState(I->second);
         break;
       case dwarf::DW_CFA_remember_state:
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index a2bb6be0b8346..f4960bf53ee02 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1221,49 +1221,49 @@ class MCPlusBuilder {
   /// Return true if the instruction is a tail call.
   bool isTailCall(const MCInst &Inst) const;
 
-  /// Stores NegateRAState annotation on Inst.
+  /// Stores NegateRAState annotation on \p Inst.
   void setNegateRAState(MCInst &Inst) const;
 
-  /// Return true if Inst has NegateRAState annotation.
+  /// Return true if \p Inst has NegateRAState annotation.
   bool hasNegateRAState(const MCInst &Inst) const;
 
-  /// Sets RememberState annotation on Inst.
+  /// Sets RememberState annotation on \p Inst.
   void setRememberState(MCInst &Inst) const;
 
-  /// Return true if Inst has RememberState annotation.
+  /// Return true if \p Inst has RememberState annotation.
   bool hasRememberState(const MCInst &Inst) const;
 
-  /// Stores RestoreState annotation on Inst.
+  /// Stores RestoreState annotation on \p Inst.
   void setRestoreState(MCInst &Inst) const;
 
-  /// Return true if Inst has RestoreState annotation.
+  /// Return true if \p Inst has RestoreState annotation.
   bool hasRestoreState(const MCInst &Inst) const;
 
-  /// Stores RA Signed annotation on Inst.
+  /// Stores RA Signed annotation on \p Inst.
   void setRASigned(MCInst &Inst) const;
 
-  /// Return true if Inst has Signed RA annotation.
+  /// Return true if \p Inst has Signed RA annotation.
   bool isRASigned(const MCInst &Inst) const;
 
-  /// Stores RA Signing annotation on Inst.
+  /// Stores RA Signing annotation on \p Inst.
   void setRASigning(MCInst &Inst) const;
 
-  /// Return true if Inst has Signing RA annotation.
+  /// Return true if \p Inst has Signing RA annotation.
   bool isRASigning(const MCInst &Inst) const;
 
-  /// Stores Authenticating annotation on Inst.
+  /// Stores Authenticating annotation on \p Inst.
   void setAuthenticating(MCInst &Inst) const;
 
-  /// Return true if Inst has Authenticating annotation.
+  /// Return true if \p Inst has Authenticating annotation.
   bool isAuthenticating(const MCInst &Inst) const;
 
-  /// Stores RA Unsigned annotation on Inst.
+  /// Stores RA Unsigned annotation on \p Inst.
   void setRAUnsigned(MCInst &Inst) const;
 
-  /// Return true if Inst has Unsigned RA annotation.
+  /// Return true if \p Inst has Unsigned RA annotation.
   bool isRAUnsigned(const MCInst &Inst) const;
 
-  /// Return true if Inst doesn't have any annotation related to RA state.
+  /// Return true if \p Inst doesn't have any annotation related to RA state.
   bool isRAStateUnknown(const MCInst &Inst) const;
 
   /// Return true if the instruction is a call with an exception handling info.
diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
index 8cf08add9402a..e62006baa2eff 100644
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -1,3 +1,14 @@
+//===- bolt/Passes/InsertNegateRAStatePass.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InsertNegateRAStatePass class.
+//
+//===----------------------------------------------------------------------===//
 #ifndef BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS
 #define BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS
 
@@ -16,7 +27,15 @@ class InsertNegateRAState : public BinaryFunctionPass {
   /// Pass entry point
   Error runOnFunctions(BinaryContext &BC) override;
   void runOnFunction(BinaryFunction &BF);
+
+private:
+  /// Loops over all instructions and adds OpNegateRAState CFI
+  /// after any pointer signing or authenticating instructions.
+  /// Returns true, if any OpNegateRAState CFIs were added.
   bool addNegateRAStateAfterPacOrAuth(BinaryFunction &BF);
+  /// Because states are tracked as MCAnnotations on individual instructions,
+  /// newly inserted instructions do not have a state associated with them.
+  /// New states are "inherited" from the last known state.
   void fixUnknownStates(BinaryFunction &BF);
 };
 
diff --git a/bolt/include/bolt/Passes/MarkRAStates.h b/bolt/include/bolt/Passes/MarkRAStates.h
index 3cbd6044683da..e7a49f813b6a7 100644
--- a/bolt/include/bolt/Passes/MarkRAStates.h
+++ b/bolt/include/bolt/Passes/MarkRAStates.h
@@ -1,3 +1,14 @@
+//===- bolt/Passes/MarkRAStates.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MarkRAStates class.
+//
+//===----------------------------------------------------------------------===//
 #ifndef BOLT_PASSES_MARK_RA_STATES
 #define BOLT_PASSES_MARK_RA_STATES
 
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index d38b37bcb3c61..63b7ad43b1dec 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -640,16 +640,16 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
         BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
       return false;
     case DW_CFA_GNU_window_save:
-      // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
-      // id but mean different things. The latter is used in AArch64.
+      // DW_CFA_GNU_window_save and DW_CFA_AARCH64_negate_ra_state just use the
+      // same id but mean different things. The latter is used in AArch64.
       if (Function.getBinaryContext().isAArch64()) {
-        // Not adding OpNegateRAState since the location they are needed
+        // The location OpNegateRAState CFIs are needed
         // depends on the order of BasicBlocks, which changes during
-        // optimizations. Instead, an annotation is added to the instruction, to
-        // mark that the instruction modifies the RA State. The actual state for
-        // instructions are worked out in MarkRAStates based on these
-        // annotations.
-        Function.setInstModifiesRAState(DW_CFA_GNU_window_save, Offset);
+        // optimizations. Instead of adding OpNegateRAState CFIs, an annotation
+        // is added to the instruction, to mark that the instruction modifies
+        // the RA State. The actual state for instructions are worked out in
+        // MarkRAStates based on these annotations.
+        Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state, Offset);
         break;
       }
       if (opts::Verbosity >= 1)
diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp
index cd7b8bf0efa75..835a85ad54f37 100644
--- a/bolt/lib/Core/MCPlusBuilder.cpp
+++ b/bolt/lib/Core/MCPlusBuilder.cpp
@@ -153,9 +153,7 @@ void MCPlusBuilder::setNegateRAState(MCInst &Inst) const {
 }
 
 bool MCPlusBuilder::hasNegateRAState(const MCInst &Inst) const {
-  if (hasAnnotation(Inst, MCAnnotation::kNegateState))
-    return true;
-  return false;
+  return hasAnnotation(Inst, MCAnnotation::kNegateState);
 }
 
 void MCPlusBuilder::setRememberState(MCInst &Inst) const {
@@ -164,9 +162,7 @@ void MCPlusBuilder::setRememberState(MCInst &Inst) const {
 }
 
 bool MCPlusBuilder::hasRememberState(const MCInst &Inst) const {
-  if (hasAnnotation(Inst, MCAnnotation::kRememberState))
-    return true;
-  return false;
+  return hasAnnotation(Inst, MCAnnotation::kRememberState);
 }
 
 void MCPlusBuilder::setRestoreState(MCInst &Inst) const {
@@ -175,9 +171,7 @@ void MCPlusBuilder::setRestoreState(MCInst &Inst) const {
 }
 
 bool MCPlusBuilder::hasRestoreState(const MCInst &Inst) const {
-  if (hasAnnotation(Inst, MCAnnotation::kRestoreState))
-    return true;
-  return false;
+  return hasAnnotation(Inst, MCAnnotation::kRestoreState);
 }
 
 void MCPlusBuilder::setRASigned(MCInst &Inst) const {
@@ -186,9 +180,7 @@ void MCPlusBuilder::setRASigned(MCInst &Inst) const {
 }
 
 bool MCPlusBuilder::isRASigned(const MCInst &Inst) const {
-  if (hasAnnotation(Inst, MCAnnotation::kSigned))
-    return true;
-  return false;
+  return hasAnnotation(Inst, MCAnnotation::kSigned);
 }
 
 void MCPlusBuilder::setRASigning(MCInst &Inst) const {
@@ -197,9 +189,7 @@ void MCPlusBuilder::setRASigning(MCInst &Inst) const {
 }
 
 bool MCPlusBuilder::isRASigning(const MCInst &Inst) const {
-  if (hasAnnotation(Inst, MCAnnotation::kSigning))
-    return true;
-  return false;
+  return hasAnnotation(Inst, MCAnnotation::kSigning);
 }
 
 void MCPlusBuilder::setAuthenticating(MCInst &Inst) const {
@@ -208,9 +198,7 @@ void MCPlusBuilder::setAuthenticating(MCInst &Inst) const {
 }
 
 bool MCPlusBuilder::isAuthenticating(const MCInst &Inst) const {
-  if (hasAnnotation(Inst, MCAnnotation::kAuthenticating))
-    return true;
-  return false;
+  return hasAnnotation(Inst, MCAnnotation::kAuthenticating);
 }
 
 void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const {
@@ -219,16 +207,12 @@ void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const {
 }
 
 bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const {
-  if (hasAnnotation(Inst, MCAnnotation::kUnsigned))
-    return true;
-  return false;
+  return hasAnnotation(Inst, MCAnnotation::kUnsigned);
 }
 
 bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const {
-  if (hasAnnotation(Inst, MCAnnotation::kUnsigned) ||
-      hasAnnotation(Inst, MCAnnotation::kSigned) ||
-      hasAnnotation(Inst, MCAnnotation::kSigning) ||
-      hasAnnotation(Inst, MCAnnotation::kAuthenticating))
+  if (isRAUnsigned(Inst) || isRASigned(Inst) || isRASigning(Inst) ||
+      isAuthenticating(Inst))
     return false;
   return true;
 }
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index ecdd33fa36da6..62e363a8bcbde 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -27,9 +27,8 @@ namespace bolt {
 void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
   BinaryContext &BC = BF.getBinaryContext();
 
-  if (BF.getState() == BinaryFunction::State::Empty) {
+  if (BF.getState() == BinaryFunction::State::Empty)
     return;
-  }
 
   if (BF.getState() != BinaryFunction::State::CFG &&
       BF.getState() != BinaryFunction::State::CFG_Finalized) {
@@ -91,13 +90,7 @@ bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) {
   for (BinaryBasicBlock &BB : BF) {
     for (auto Iter = BB.begin(); Iter != BB.end(); ++Iter) {
       MCInst &Inst = *Iter;
-      if (BC.MIB->isPSign(Inst)) {
-        Iter = BF.addCFIInstruction(
-            &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr));
-        FoundAny = true;
-      }
-
-      if (BC.MIB->isPAuth(Inst)) {
+      if (BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) {
         Iter = BF.addCFIInstruction(
             &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr));
         FoundAny = true;
@@ -111,9 +104,7 @@ void InsertNegateRAState::fixUnknownStates(BinaryFunction &BF) {
   BinaryContext &BC = BF.getBinaryContext();
   bool FirstIter = true;
   MCInst PrevInst;
-  for (auto BBIt = BF.begin(); BBIt != BF.end(); ++BBIt) {
-    BinaryBasicBlock &BB = *BBIt;
-
+  for (BinaryBasicBlock &BB : BF) {
     for (auto It = BB.begin(); It != BB.end(); ++It) {
 
       MCInst &Inst = *It;
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index 03affab2f2154..69d68e46a2d46 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -11,7 +11,7 @@
 // - NegateRAState flips the RA State,
 // - RememberState pushes the RA State to a stack,
 // - RestoreState pops the RA State from the stack.
-// These were saved as MCAnnotations on instructions they refer to at CFI
+// These are saved as MCAnnotations on instructions they refer to at CFI
 // reading (in CFIReaderWriter::fillCFIInfoFor). In this pass, we can work out
 // the RA State of each instruction, and save it as new MCAnnotations. The new
 // annotations are Signing, Signed, Authenticating and Unsigned. After
@@ -95,9 +95,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
           continue;
         if (Annotation == MCPlus::MCAnnotation::kNegateState)
           RAState = !RAState;
-        if (Annotation == MCPlus::MCAnnotation::kRememberState)
+        else if (Annotation == MCPlus::MCAnnotation::kRememberState)
           RAStateStack.push(RAState);
-        if (Annotation == MCPlus::MCAnnotation::kRestoreState) {
+        else if (Annotation == MCPlus::MCAnnotation::kRestoreState) {
           RAState = RAStateStack.top();
           RAStateStack.pop();
         }

>From c79418bebc30c6c7b9ea8504e18e0fc1000553f8 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 22 Jan 2025 09:57:31 +0100
Subject: [PATCH 07/15] [BOLT] Fix more review nits (#120064)

---
 bolt/include/bolt/Core/MCPlus.h  | 14 +++++++-------
 bolt/lib/Core/MCPlusBuilder.cpp  |  6 ++----
 bolt/lib/Passes/MarkRAStates.cpp |  1 -
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h
index 6ff3c580da0d4..a95bba36c5a6e 100644
--- a/bolt/include/bolt/Core/MCPlus.h
+++ b/bolt/include/bolt/Core/MCPlus.h
@@ -72,13 +72,13 @@ class MCAnnotation {
     kLabel,               /// MCSymbol pointing to this instruction.
     kSize,                /// Size of the instruction.
     kDynamicBranch,       /// Jit instruction patched at runtime.
-    kSigning,             /// Inst is a signing instruction (paciasp, etc.)
-    kSigned,              /// Inst is in a range where RA is signed
-    kAuthenticating,      /// Authenticating inst (e.g. autiasp)
-    kUnsigned,            /// Inst is in a range where RA is unsigned
-    kRememberState,       /// Inst has rememberState CFI
-    kRestoreState,        /// Inst has restoreState CFI
-    kNegateState,         /// Inst has OpNegateRAState CFI
+    kSigning,             /// Inst is a signing instruction (paciasp, etc.).
+    kSigned,              /// Inst is in a range where RA is signed.
+    kAuthenticating,      /// Authenticating inst (e.g. autiasp).
+    kUnsigned,            /// Inst is in a range where RA is unsigned.
+    kRememberState,       /// Inst has rememberState CFI.
+    kRestoreState,        /// Inst has restoreState CFI.
+    kNegateState,         /// Inst has OpNegateRAState CFI.
     kGeneric,             /// First generic annotation.
   };
 
diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp
index 835a85ad54f37..4b23f1056c511 100644
--- a/bolt/lib/Core/MCPlusBuilder.cpp
+++ b/bolt/lib/Core/MCPlusBuilder.cpp
@@ -211,10 +211,8 @@ bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const {
 }
 
 bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const {
-  if (isRAUnsigned(Inst) || isRASigned(Inst) || isRASigning(Inst) ||
-      isAuthenticating(Inst))
-    return false;
-  return true;
+  return !(isRAUnsigned(Inst) || isRASigned(Inst) || isRASigning(Inst) ||
+           isAuthenticating(Inst));
 }
 
 std::optional<MCLandingPad> MCPlusBuilder::getEHInfo(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index 69d68e46a2d46..4d1f126faf074 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -74,7 +74,6 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
       if (BC.MIB->isPSign(Inst)) {
         assert(!RAState && "Signed RA State before PSign");
         BC.MIB->setRASigning(Inst);
-
       } else if (BC.MIB->isPAuth(Inst)) {
         assert(RAState && "Unsigned RA State before PAuth");
         BC.MIB->setAuthenticating(Inst);

>From 477a6f24e3f9d1f805efc375df0019bd2835ea0e Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Thu, 6 Feb 2025 15:58:48 +0100
Subject: [PATCH 08/15] [BOLT] remove asserts from MarkRAStates

    instead of failing on asserts, setIgnored() to functions
    with inconsistent input RA States
---
 bolt/lib/Passes/MarkRAStates.cpp | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index 4d1f126faf074..adccf2090c36f 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -54,7 +54,7 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
         // authenticated, so functions always start with unsigned RAState when
         // working with compiler-generated code)
         BF.setIgnored();
-        BC.outs() << "BOLT-INFO: ignoring RAStates in function "
+        BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
                   << BF.getPrintName() << "\n";
         return;
       }
@@ -72,10 +72,22 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
         continue;
 
       if (BC.MIB->isPSign(Inst)) {
-        assert(!RAState && "Signed RA State before PSign");
+        if (RAState) {
+          // RA signing instructions should only follow unsigned RA state.
+          BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+                    << BF.getPrintName() << "\n";
+          BF.setIgnored();
+          return;
+        }
         BC.MIB->setRASigning(Inst);
       } else if (BC.MIB->isPAuth(Inst)) {
-        assert(RAState && "Unsigned RA State before PAuth");
+        if (!RAState) {
+          // RA authenticating instructions should only follow signed RA state.
+          BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+                    << BF.getPrintName() << "\n";
+          BF.setIgnored();
+          return;
+        }
         BC.MIB->setAuthenticating(Inst);
       } else if (RAState) {
         BC.MIB->setRASigned(Inst);

>From c136aa1200b91db47b05ac37b385bdbdcbc40677 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Sat, 8 Mar 2025 11:45:38 +0100
Subject: [PATCH 09/15] [BOLT] Bug fix: remove usage of outdated reference

---
 bolt/lib/Passes/InsertNegateRAStatePass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index 62e363a8bcbde..098ebf8f953b4 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -78,7 +78,7 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
       } else {
         FirstIter = false;
       }
-      PrevInst = Inst;
+      PrevInst = *It;
     }
     PrevBB = *BB;
   }

>From 1f34fa360a8b73e5dc331599fc499d05735ccc5d Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 7 Apr 2025 18:19:20 +0200
Subject: [PATCH 10/15] [BOLT] Add all PSign/PAuth variants

---
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index dcbd82b479549..c3d48d3699ede 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -677,10 +677,28 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     return false;
   }
   bool isPAuth(MCInst &Inst) const override {
-    return Inst.getOpcode() == AArch64::AUTIASP;
+    return Inst.getOpcode() == AArch64::AUTIA ||
+           Inst.getOpcode() == AArch64::AUTIB ||
+           Inst.getOpcode() == AArch64::AUTIA1716 ||
+           Inst.getOpcode() == AArch64::AUTIB1716 ||
+           Inst.getOpcode() == AArch64::AUTIASP ||
+           Inst.getOpcode() == AArch64::AUTIBSP ||
+           Inst.getOpcode() == AArch64::AUTIAZ ||
+           Inst.getOpcode() == AArch64::AUTIBZ ||
+           Inst.getOpcode() == AArch64::AUTIZA ||
+           Inst.getOpcode() == AArch64::AUTIZB;
   }
   bool isPSign(MCInst &Inst) const override {
-    return Inst.getOpcode() == AArch64::PACIASP;
+    return Inst.getOpcode() == AArch64::PACIA ||
+           Inst.getOpcode() == AArch64::PACIB ||
+           Inst.getOpcode() == AArch64::PACIA1716 ||
+           Inst.getOpcode() == AArch64::PACIB1716 ||
+           Inst.getOpcode() == AArch64::PACIASP ||
+           Inst.getOpcode() == AArch64::PACIBSP ||
+           Inst.getOpcode() == AArch64::PACIAZ ||
+           Inst.getOpcode() == AArch64::PACIBZ ||
+           Inst.getOpcode() == AArch64::PACIZA ||
+           Inst.getOpcode() == AArch64::PACIZB;
   }
 
   bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From,

>From e42844b63c7dfca3f1c2850eeefaad36e1332a67 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Fri, 4 Apr 2025 11:23:22 +0200
Subject: [PATCH 11/15] [BOLT] Add unit tests for negate_ra_state cfi handling

- also add match_dwarf.py, a tool used by the unit tests.
---
 bolt/test/AArch64/incorrect-negate-ra-state.s |  41 +++++++
 bolt/test/AArch64/simple-negate-ra-state.s    |  42 +++++++
 bolt/test/lit.cfg.py                          |   7 ++
 bolt/test/match_dwarf.py                      | 114 ++++++++++++++++++
 4 files changed, 204 insertions(+)
 create mode 100644 bolt/test/AArch64/incorrect-negate-ra-state.s
 create mode 100644 bolt/test/AArch64/simple-negate-ra-state.s
 create mode 100755 bolt/test/match_dwarf.py

diff --git a/bolt/test/AArch64/incorrect-negate-ra-state.s b/bolt/test/AArch64/incorrect-negate-ra-state.s
new file mode 100644
index 0000000000000..790c090d5a670
--- /dev/null
+++ b/bolt/test/AArch64/incorrect-negate-ra-state.s
@@ -0,0 +1,41 @@
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags  %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.exe.bolt | FileCheck %s
+# check that the output is listing foo as incorrect
+# CHECK: BOLT-INFO: inconsistent RAStates in function foo
+
+# check that foo got Ignored, so it's not in the new .text section
+# llvm-objdump %t.exe -d -j .text > %t.exe.dump
+# RUN: not grep "<foo>:" %t.exe.dump
+
+
+  .text
+  .globl  foo
+  .p2align        2
+  .type   foo, at function
+foo:
+  .cfi_startproc
+  hint    #25
+  .cfi_negate_ra_state
+  sub     sp, sp, #16
+  stp     x29, x30, [sp, #16]             // 16-byte Folded Spill
+  .cfi_def_cfa_offset 16
+  str     w0, [sp, #12]
+  ldr     w8, [sp, #12]
+  .cfi_negate_ra_state
+  add     w0, w8, #1
+  ldp     x29, x30, [sp, #16]             // 16-byte Folded Reload
+  add     sp, sp, #16
+  hint    #29
+  .cfi_negate_ra_state
+  ret
+.Lfunc_end1:
+  .size   foo, .Lfunc_end1-foo
+  .cfi_endproc
+
+  .global _start
+  .type _start, %function
+_start:
+  b foo
+
+.reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/AArch64/simple-negate-ra-state.s b/bolt/test/AArch64/simple-negate-ra-state.s
new file mode 100644
index 0000000000000..2db0ffe20112e
--- /dev/null
+++ b/bolt/test/AArch64/simple-negate-ra-state.s
@@ -0,0 +1,42 @@
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags  %t.o -o %t.exe -Wl,-q
+
+# RUN: llvm-objdump %t.exe -d > %t.exe.dump
+# RUN: llvm-objdump --dwarf=frames %t.exe -D > %t.exe.dump-dwarf
+# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > orig.txt
+
+# RUN: llvm-bolt %t.exe -o %t.exe.bolt
+
+# RUN: llvm-objdump %t.exe.bolt -d > %t.exe.bolt.dump
+# RUN: llvm-objdump --dwarf=frames %t.exe.bolt  > %t.exe.bolt.dump-dwarf
+# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > bolted.txt
+
+# RUN: diff orig.txt bolted.txt
+
+	.text
+	.globl	foo
+	.p2align	2
+	.type	foo, at function
+foo:
+	.cfi_startproc
+	hint	#25
+	.cfi_negate_ra_state
+	sub	sp, sp, #16
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	.cfi_def_cfa_offset 16
+	str	w0, [sp, #12]
+	ldr	w8, [sp, #12]
+	add	w0, w8, #1
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #16
+	hint	#29
+	.cfi_negate_ra_state
+	ret
+.Lfunc_end1:
+	.size	foo, .Lfunc_end1-foo
+	.cfi_endproc
+
+	.global _start
+	.type _start, %function
+_start:
+	b foo
diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py
index 0d05229be2bf3..60617cd76f968 100644
--- a/bolt/test/lit.cfg.py
+++ b/bolt/test/lit.cfg.py
@@ -89,6 +89,7 @@
 config.substitutions.append(("%cxxflags", ""))
 
 link_fdata_cmd = os.path.join(config.test_source_root, "link_fdata.py")
+match_dwarf_cmd = os.path.join(config.test_source_root, "match_dwarf.py")
 
 tool_dirs = [config.llvm_tools_dir, config.test_source_root]
 
@@ -131,6 +132,12 @@
     ToolSubst("llvm-readobj", unresolved="fatal"),
     ToolSubst("llvm-dwp", unresolved="fatal"),
     ToolSubst("split-file", unresolved="fatal"),
+    ToolSubst(
+        "match-dwarf",
+        command=sys.executable,
+        unresolved="fatal",
+        extra_args=[match_dwarf_cmd],
+    ),
 ]
 llvm_config.add_tool_substitutions(tools, tool_dirs)
 
diff --git a/bolt/test/match_dwarf.py b/bolt/test/match_dwarf.py
new file mode 100755
index 0000000000000..395d2fdc19016
--- /dev/null
+++ b/bolt/test/match_dwarf.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+
+# This tool helps matching dwarf dumps
+# (= the output from running llvm-objdump --dwarf=frames),
+# by address to function names (which are parsed from a normal objdump).
+# The script is used for checking if .cfi_negate_ra_state CFIs
+# are generated by BOLT the same way they are generated by LLVM.
+
+import argparse
+import subprocess
+import sys
+import re
+
+
+class NameDwarfPair(object):
+    def __init__(self, name, body):
+        self.name = name
+        self.body = body
+        self.finalized = False
+
+    def append(self, body_line):
+        # only store elements into the body until the first whitespace line is encountered.
+        if body_line.isspace():
+            self.finalized = True
+        if not self.finalized:
+            self.body += body_line
+
+    def print(self):
+        print(self.name)
+        print(self.body)
+
+    def parse_negates(self):
+        negate_offsets = []
+        loc = 0
+        # TODO: make sure this is not printed in hex
+        re_advloc = f"DW_CFA_advance_loc: (\d+)"
+
+        for line in self.body.splitlines():
+            # if line matches advance_loc int
+            match = re.search(re_advloc, line)
+            if match:
+                loc += int(match.group(1))
+            if "DW_CFA_AARCH64_negate_ra_state" in line:
+                negate_offsets.append(loc)
+
+        self.negate_offsets = negate_offsets
+
+    def __eq__(self, other):
+        return self.name == other.name and self.negate_offsets == other.negate_offsets
+
+
+def parse_objdump(objdump):
+    """
+    Parse and return address-to-name dictionary from objdump file
+    """
+    addr_name_dict = dict()
+    re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$")
+    with open(objdump, "r") as f:
+        for line in f.readlines():
+            match = re_function.match(line)
+            if not match:
+                continue
+            m_addr = match.groups()[0]
+            m_name = match.groups()[1]
+            addr_name_dict[int(m_addr, 16)] = m_name
+
+    return addr_name_dict
+
+
+def parse_dwarf(dwarfdump, addr_name_dict):
+    """
+    Parse dwarf dump, and match names to blocks using the dict from the objdump.
+    Return a list of NameDwarfPairs.
+    """
+    re_address_line = re.compile(r".*pc=([0-9a-fA-F]{8})\.\.\.([0-9a-fA-F]{8})")
+    with open(dwarfdump, "r") as dw:
+        functions = []
+        for line in dw.readlines():
+            match = re_address_line.match(line)
+            if not match:
+                if len(functions) > 0:
+                    functions[-1].append(line)
+                continue
+            pc_start_address = match.groups()[0]
+            name = addr_name_dict.get(int(pc_start_address, 16))
+            functions.append(NameDwarfPair(name, ""))
+
+        return functions
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("objdump", help="Objdump file")
+    parser.add_argument(
+        "dwarfdump", help="dwarf dump file created with 'llvm-objdump --dwarf=frames'"
+    )
+    parser.add_argument("function", help="Function to search CFIs in.")
+
+    args = parser.parse_args()
+
+    addr_name_dict = parse_objdump(args.objdump)
+    functions = parse_dwarf(args.dwarfdump, addr_name_dict)
+
+    for f in functions:
+        if f.name == args.function:
+            f.parse_negates()
+            print(f.negate_offsets)
+            break
+    else:
+        print(f"{args.function} not found")
+        exit(-1)
+
+
+main()

>From 21f356ab26467a2073d4b231ca2b430035520a14 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 30 Apr 2025 09:59:34 +0200
Subject: [PATCH 12/15] [BOLT][AArch64] Fix which PSign and PAuth variants are
 used (#120064)

- only the ones operating on LR should be marked
    with .cfi_cfi_negate_ra_state
- added support for fused PtrAuth and Ret instructions,
    e.g. RETAA.
---
 bolt/include/bolt/Core/MCPlusBuilder.h        | 26 ++++--
 .../bolt/Passes/InsertNegateRAStatePass.h     |  4 +-
 bolt/lib/Passes/InsertNegateRAStatePass.cpp   | 13 ++-
 bolt/lib/Passes/MarkRAStates.cpp              |  7 +-
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 79 +++++++++++++------
 5 files changed, 90 insertions(+), 39 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index f4960bf53ee02..14a327d3c4fb4 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -575,11 +575,30 @@ class MCPlusBuilder {
     return {};
   }
 
+  virtual ErrorOr<MCPhysReg> getSignedReg(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return getNoRegister();
+  }
   virtual ErrorOr<MCPhysReg> getAuthenticatedReg(const MCInst &Inst) const {
     llvm_unreachable("not implemented");
     return getNoRegister();
   }
 
+  virtual bool isPSignOnLR(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
+  virtual bool isPAuthOnLR(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
+  virtual bool isPAuthAndRet(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
   virtual bool isAuthenticationOfReg(const MCInst &Inst,
                                      MCPhysReg AuthenticatedReg) const {
     llvm_unreachable("not implemented");
@@ -740,13 +759,6 @@ class MCPlusBuilder {
     llvm_unreachable("not implemented");
     return false;
   }
-  virtual bool isPAuth(MCInst &Inst) const {
-    llvm_unreachable("not implemented");
-  }
-
-  virtual bool isPSign(MCInst &Inst) const {
-    llvm_unreachable("not implemented");
-  }
 
   virtual bool isCleanRegXOR(const MCInst &Inst) const {
     llvm_unreachable("not implemented");
diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
index e62006baa2eff..ce73b5a152d12 100644
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -30,7 +30,9 @@ class InsertNegateRAState : public BinaryFunctionPass {
 
 private:
   /// Loops over all instructions and adds OpNegateRAState CFI
-  /// after any pointer signing or authenticating instructions.
+  /// after any pointer signing or authenticating instructions,
+  /// which operate on the LR, except fused ptrauth + ret instructions
+  /// (such as RETAA).
   /// Returns true, if any OpNegateRAState CFIs were added.
   bool addNegateRAStateAfterPacOrAuth(BinaryFunction &BF);
   /// Because states are tracked as MCAnnotations on individual instructions,
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
index 098ebf8f953b4..05d4d58c7e4cf 100644
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
@@ -46,14 +46,16 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
   bool FirstIter = true;
   MCInst PrevInst;
   BinaryBasicBlock *PrevBB = nullptr;
+  // We need to iterate on BBs in the Layout order
+  // not in the order they are stored in the BF class.
   auto *Begin = BF.getLayout().block_begin();
   auto *End = BF.getLayout().block_end();
   for (auto *BB = Begin; BB != End; BB++) {
 
     // Support for function splitting:
-    // if two consecutive BBs are going to end up in different functions,
-    // we have to negate the RA State, so the new function starts with a Signed
-    // state.
+    // if two consecutive BBs with Signed state are going to end up in different
+    // functions, we have to add a OpNegateRAState to the beginning of the newly
+    // split function, so it starts with a Signed state.
     if (PrevBB != nullptr &&
         PrevBB->getFragmentNum() != (*BB)->getFragmentNum() &&
         BC.MIB->isRASigned(*((*BB)->begin()))) {
@@ -68,6 +70,8 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
         continue;
 
       if (!FirstIter) {
+        // Consecutive instructions with different RAState means we need to add
+        // a OpNegateRAState.
         if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) ||
             (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) {
 
@@ -90,7 +94,8 @@ bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) {
   for (BinaryBasicBlock &BB : BF) {
     for (auto Iter = BB.begin(); Iter != BB.end(); ++Iter) {
       MCInst &Inst = *Iter;
-      if (BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) {
+      if (BC.MIB->isPSignOnLR(Inst) ||
+          (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) {
         Iter = BF.addCFIInstruction(
             &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr));
         FoundAny = true;
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index adccf2090c36f..8121fffb93c9f 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -46,7 +46,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
   for (BinaryBasicBlock &BB : BF) {
     for (auto It = BB.begin(); It != BB.end(); ++It) {
       MCInst &Inst = *It;
-      if ((BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) &&
+      if ((BC.MIB->isPSignOnLR(Inst) ||
+           (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) &&
           !BC.MIB->hasNegateRAState(Inst)) {
         // no .cfi_negate_ra_state attached to signing or authenticating instr
         // means, that this is a function with handwritten assembly, which might
@@ -71,7 +72,7 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
       if (BC.MIB->isCFI(Inst))
         continue;
 
-      if (BC.MIB->isPSign(Inst)) {
+      if (BC.MIB->isPSignOnLR(Inst)) {
         if (RAState) {
           // RA signing instructions should only follow unsigned RA state.
           BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
@@ -80,7 +81,7 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
           return;
         }
         BC.MIB->setRASigning(Inst);
-      } else if (BC.MIB->isPAuth(Inst)) {
+      } else if (BC.MIB->isPAuthOnLR(Inst)) {
         if (!RAState) {
           // RA authenticating instructions should only follow signed RA state.
           BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index c3d48d3699ede..c4db376a52f32 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -196,6 +196,37 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     return {AArch64::LR};
   }
 
+  ErrorOr<MCPhysReg> getSignedReg(const MCInst &Inst) const override {
+    switch (Inst.getOpcode()) {
+    case AArch64::PACIAZ:
+    case AArch64::PACIBZ:
+    case AArch64::PACIASP:
+    case AArch64::PACIBSP:
+    case AArch64::PACIASPPC:
+    case AArch64::PACIBSPPC:
+      return AArch64::LR;
+
+    case AArch64::PACIA1716:
+    case AArch64::PACIB1716:
+    case AArch64::PACIA171615:
+    case AArch64::PACIB171615:
+      return AArch64::X17;
+
+    case AArch64::PACIA:
+    case AArch64::PACIB:
+    case AArch64::PACDA:
+    case AArch64::PACDB:
+    case AArch64::PACIZA:
+    case AArch64::PACIZB:
+    case AArch64::PACDZA:
+    case AArch64::PACDZB:
+      return Inst.getOperand(0).getReg();
+
+    default:
+      return getNoRegister();
+    }
+  }
+
   ErrorOr<MCPhysReg> getAuthenticatedReg(const MCInst &Inst) const override {
     switch (Inst.getOpcode()) {
     case AArch64::AUTIAZ:
@@ -250,6 +281,30 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
   }
 
+  bool isPSignOnLR(const MCInst &Inst) const override {
+    ErrorOr<MCPhysReg> SignReg = getSignedReg(Inst);
+    if (SignReg && *SignReg != getNoRegister() && *SignReg == AArch64::LR)
+      return true;
+
+    return false;
+  }
+
+  bool isPAuthOnLR(const MCInst &Inst) const override {
+    ErrorOr<MCPhysReg> AutReg = getAuthenticatedReg(Inst);
+    if (AutReg && *AutReg != getNoRegister() && *AutReg == AArch64::LR)
+      return true;
+    return false;
+  }
+
+  bool isPAuthAndRet(const MCInst &Inst) const override {
+    return Inst.getOpcode() == AArch64::RETAA ||
+           Inst.getOpcode() == AArch64::RETAB ||
+           Inst.getOpcode() == AArch64::RETAASPPCi ||
+           Inst.getOpcode() == AArch64::RETABSPPCi ||
+           Inst.getOpcode() == AArch64::RETAASPPCr ||
+           Inst.getOpcode() == AArch64::RETABSPPCr;
+  }
+
   bool isAuthenticationOfReg(const MCInst &Inst, MCPhysReg Reg) const override {
     if (Reg == getNoRegister())
       return false;
@@ -676,30 +731,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
     return false;
   }
-  bool isPAuth(MCInst &Inst) const override {
-    return Inst.getOpcode() == AArch64::AUTIA ||
-           Inst.getOpcode() == AArch64::AUTIB ||
-           Inst.getOpcode() == AArch64::AUTIA1716 ||
-           Inst.getOpcode() == AArch64::AUTIB1716 ||
-           Inst.getOpcode() == AArch64::AUTIASP ||
-           Inst.getOpcode() == AArch64::AUTIBSP ||
-           Inst.getOpcode() == AArch64::AUTIAZ ||
-           Inst.getOpcode() == AArch64::AUTIBZ ||
-           Inst.getOpcode() == AArch64::AUTIZA ||
-           Inst.getOpcode() == AArch64::AUTIZB;
-  }
-  bool isPSign(MCInst &Inst) const override {
-    return Inst.getOpcode() == AArch64::PACIA ||
-           Inst.getOpcode() == AArch64::PACIB ||
-           Inst.getOpcode() == AArch64::PACIA1716 ||
-           Inst.getOpcode() == AArch64::PACIB1716 ||
-           Inst.getOpcode() == AArch64::PACIASP ||
-           Inst.getOpcode() == AArch64::PACIBSP ||
-           Inst.getOpcode() == AArch64::PACIAZ ||
-           Inst.getOpcode() == AArch64::PACIBZ ||
-           Inst.getOpcode() == AArch64::PACIZA ||
-           Inst.getOpcode() == AArch64::PACIZB;
-  }
 
   bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From,
                       MCPhysReg &To) const override {

>From ef432ae04e19d4ad15c9bdd7d50af1e2decc2e6c Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 30 Apr 2025 10:15:54 +0200
Subject: [PATCH 13/15] [BOLT] Codereview changes #120064

- unittests
- test/match_dwarf.py
---
 ...ra-state.s => negate-ra-state-incorrect.s} |  8 ++--
 ...le-negate-ra-state.s => negate-ra-state.s} |  6 +--
 bolt/test/match_dwarf.py                      | 40 +++++++++++++++----
 3 files changed, 40 insertions(+), 14 deletions(-)
 rename bolt/test/AArch64/{incorrect-negate-ra-state.s => negate-ra-state-incorrect.s} (78%)
 rename bolt/test/AArch64/{simple-negate-ra-state.s => negate-ra-state.s} (86%)

diff --git a/bolt/test/AArch64/incorrect-negate-ra-state.s b/bolt/test/AArch64/negate-ra-state-incorrect.s
similarity index 78%
rename from bolt/test/AArch64/incorrect-negate-ra-state.s
rename to bolt/test/AArch64/negate-ra-state-incorrect.s
index 790c090d5a670..83def97755a33 100644
--- a/bolt/test/AArch64/incorrect-negate-ra-state.s
+++ b/bolt/test/AArch64/negate-ra-state-incorrect.s
@@ -5,10 +5,14 @@
 # CHECK: BOLT-INFO: inconsistent RAStates in function foo
 
 # check that foo got Ignored, so it's not in the new .text section
-# llvm-objdump %t.exe -d -j .text > %t.exe.dump
+# RUN: llvm-objdump %t.exe.bolt -d -j .text > %t.exe.dump
 # RUN: not grep "<foo>:" %t.exe.dump
 
 
+# Why is this test incorrect?
+#   There is an extra .cfi_negate_ra_state in line ...
+#   Because of this, we will get to the autiasp (hint #29)
+#   in a (seemingly) unsigned state. That is incorrect.
   .text
   .globl  foo
   .p2align        2
@@ -37,5 +41,3 @@ foo:
   .type _start, %function
 _start:
   b foo
-
-.reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/AArch64/simple-negate-ra-state.s b/bolt/test/AArch64/negate-ra-state.s
similarity index 86%
rename from bolt/test/AArch64/simple-negate-ra-state.s
rename to bolt/test/AArch64/negate-ra-state.s
index 2db0ffe20112e..441da0eb4e9cf 100644
--- a/bolt/test/AArch64/simple-negate-ra-state.s
+++ b/bolt/test/AArch64/negate-ra-state.s
@@ -3,15 +3,15 @@
 
 # RUN: llvm-objdump %t.exe -d > %t.exe.dump
 # RUN: llvm-objdump --dwarf=frames %t.exe -D > %t.exe.dump-dwarf
-# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > orig.txt
+# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > %t.match-dwarf.txt
 
 # RUN: llvm-bolt %t.exe -o %t.exe.bolt
 
 # RUN: llvm-objdump %t.exe.bolt -d > %t.exe.bolt.dump
 # RUN: llvm-objdump --dwarf=frames %t.exe.bolt  > %t.exe.bolt.dump-dwarf
-# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > bolted.txt
+# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > %t.bolt.match-dwarf.txt
 
-# RUN: diff orig.txt bolted.txt
+# RUN: diff %t.match-dwarf.txt %t.bolt.match-dwarf.txt
 
 	.text
 	.globl	foo
diff --git a/bolt/test/match_dwarf.py b/bolt/test/match_dwarf.py
index 395d2fdc19016..c0a5a91fb9725 100755
--- a/bolt/test/match_dwarf.py
+++ b/bolt/test/match_dwarf.py
@@ -5,6 +5,14 @@
 # by address to function names (which are parsed from a normal objdump).
 # The script is used for checking if .cfi_negate_ra_state CFIs
 # are generated by BOLT the same way they are generated by LLVM.
+# The script is called twice in unittests: once with the objdumps of
+# the BOLT input binary, and once with the output binary from BOLT.
+# We output the offsets of .cfi_negate_ra_state instructions from the
+# function's start address to see that BOLT can generate them to the same
+# locations.
+# Because we check the location, this is only useful for testing without
+# optimization flags, so `llvm-bolt input.exe -o output.exe`
+
 
 import argparse
 import subprocess
@@ -29,11 +37,17 @@ def print(self):
         print(self.name)
         print(self.body)
 
-    def parse_negates(self):
+    def parse_negate_offsets(self):
+        """
+        Create a list of locations/offsets of the negate_ra_state
+        CFIs in the dwarf entry.
+        To find offsets for each, we match the DW_CFA_advance_loc entries,
+        and sum up their values.
+        """
         negate_offsets = []
         loc = 0
         # TODO: make sure this is not printed in hex
-        re_advloc = f"DW_CFA_advance_loc: (\d+)"
+        re_advloc = r"DW_CFA_advance_loc: (\d+)"
 
         for line in self.body.splitlines():
             # if line matches advance_loc int
@@ -49,9 +63,12 @@ def __eq__(self, other):
         return self.name == other.name and self.negate_offsets == other.negate_offsets
 
 
-def parse_objdump(objdump):
+def extract_function_addresses(objdump):
     """
     Parse and return address-to-name dictionary from objdump file
+    Function names in the objdump look like this:
+        000123abc <foo>:
+    We want to create a dict from the addr (000123abc), to the name (foo).
     """
     addr_name_dict = dict()
     re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$")
@@ -67,12 +84,19 @@ def parse_objdump(objdump):
     return addr_name_dict
 
 
-def parse_dwarf(dwarfdump, addr_name_dict):
+def match_dwarf_to_name(dwarfdump, addr_name_dict):
     """
     Parse dwarf dump, and match names to blocks using the dict from the objdump.
     Return a list of NameDwarfPairs.
+    The matched lines look like this:
+    000123 000456 000789 FDE cie=000000  pc=0123abc...0456def
+    We do not have the function name for this, only the PC range it applies to.
+    We need to find the pc=0123abc (the start address), and find the matching name from
+    the addr_name_dict.
+    The result NameDwarfPair will hold the lines this header applied to, and instead of
+    the header with the addresses, it will just have the function name.
     """
-    re_address_line = re.compile(r".*pc=([0-9a-fA-F]{8})\.\.\.([0-9a-fA-F]{8})")
+    re_address_line = re.compile(r".*pc=([0-9a-fA-F]+)\.\.\.([0-9a-fA-F]+)")
     with open(dwarfdump, "r") as dw:
         functions = []
         for line in dw.readlines():
@@ -98,12 +122,12 @@ def main():
 
     args = parser.parse_args()
 
-    addr_name_dict = parse_objdump(args.objdump)
-    functions = parse_dwarf(args.dwarfdump, addr_name_dict)
+    addr_name_dict = extract_function_addresses(args.objdump)
+    functions = match_dwarf_to_name(args.dwarfdump, addr_name_dict)
 
     for f in functions:
         if f.name == args.function:
-            f.parse_negates()
+            f.parse_negate_offsets()
             print(f.negate_offsets)
             break
     else:

>From 84ab9475e93ef3aaa0bbfd462a0216f0e0b9141b Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 5 May 2025 10:18:14 +0200
Subject: [PATCH 14/15] [BOLT] Add allow-experimental-pacret flag

- put feature behind new flag.
- to not fail because of the missing flag, read OpNegateRAState CFIs
    normally from binaries.
- in the beginning of MarkRAStates Pass, remove all OpNegateRAState CFIs.
---
 bolt/include/bolt/Utils/CommandLineOpts.h |  1 +
 bolt/lib/Core/BinaryFunction.cpp          | 32 ++++++++++++++---------
 bolt/lib/Core/Exceptions.cpp              |  8 ++++++
 bolt/lib/Passes/MarkRAStates.cpp          | 25 ++++++++++++++++++
 bolt/lib/Rewrite/BinaryPassManager.cpp    |  5 ++++
 5 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index 3de945f6a1507..34e96dadf33d1 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -53,6 +53,7 @@ extern llvm::cl::opt<std::string> OutputFilename;
 extern llvm::cl::opt<std::string> PerfData;
 extern llvm::cl::opt<bool> PrintCacheMetrics;
 extern llvm::cl::opt<bool> PrintSections;
+extern llvm::cl::opt<bool> AllowPacret;
 
 // The format to use with -o in aggregation mode (perf2bolt)
 enum ProfileFormatKind { PF_Fdata, PF_YAML };
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index e2540a8077c9a..9f05bd5db1f95 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -64,6 +64,7 @@ extern cl::opt<bool> Instrument;
 extern cl::opt<bool> StrictMode;
 extern cl::opt<bool> UpdateDebugSections;
 extern cl::opt<unsigned> Verbosity;
+extern cl::opt<bool> AllowPacret;
 
 extern bool BinaryAnalysisMode;
 extern bool HeatmapMode;
@@ -2769,10 +2770,12 @@ struct CFISnapshot {
       llvm_unreachable("unsupported CFI opcode");
       break;
     case MCCFIInstruction::OpNegateRAState:
-      if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) {
-        llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. "
-                         "as produced by '-mbranch-protection=pac-ret') are "
-                         "currently not supported by BOLT.");
+      if (!(opts::BinaryAnalysisMode || opts::HeatmapMode ||
+            opts::AllowPacret)) {
+        llvm_unreachable(
+            "BOLT-ERROR: support for binaries using pac-ret hardening (e.g. as "
+            "produced by '-mbranch-protection=pac-ret') is experimental\n"
+            "BOLT-ERROR: set --allow-experimental-pacret to allow processing");
       }
       break;
     case MCCFIInstruction::OpRememberState:
@@ -2788,7 +2791,6 @@ struct CFISnapshot {
   void advanceTo(int32_t State) {
     for (int32_t I = CurState, E = State; I != E; ++I) {
       const MCCFIInstruction &Instr = FDE[I];
-      assert(Instr.getOperation() != MCCFIInstruction::OpNegateRAState);
       if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) {
         update(Instr, I);
         continue;
@@ -2916,10 +2918,12 @@ struct CFISnapshotDiff : public CFISnapshot {
       llvm_unreachable("unsupported CFI opcode");
       return false;
     case MCCFIInstruction::OpNegateRAState:
-      if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) {
-        llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. "
-                         "as produced by '-mbranch-protection=pac-ret') are "
-                         "currently not supported by BOLT.");
+      if (!(opts::BinaryAnalysisMode || opts::HeatmapMode ||
+            opts::AllowPacret)) {
+        llvm_unreachable(
+            "BOLT-ERROR: support for binaries using pac-ret hardening (e.g. as "
+            "produced by '-mbranch-protection=pac-ret') is experimental\n"
+            "BOLT-ERROR: set --allow-experimental-pacret to allow processing");
       }
       break;
     case MCCFIInstruction::OpRememberState:
@@ -3073,10 +3077,12 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
       llvm_unreachable("unsupported CFI opcode");
       break;
     case MCCFIInstruction::OpNegateRAState:
-      if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) {
-        llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. "
-                         "as produced by '-mbranch-protection=pac-ret') are "
-                         "currently not supported by BOLT.");
+      if (!(opts::BinaryAnalysisMode || opts::HeatmapMode ||
+            opts::AllowPacret)) {
+        llvm_unreachable(
+            "BOLT-ERROR: support for binaries using pac-ret hardening (e.g. as "
+            "produced by '-mbranch-protection=pac-ret') is experimental\n"
+            "BOLT-ERROR: set --allow-experimental-pacret to allow processing");
       }
       break;
     case MCCFIInstruction::OpGnuArgsSize:
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index 63b7ad43b1dec..d2cdb7b28272c 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -650,6 +650,14 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
         // the RA State. The actual state for instructions are worked out in
         // MarkRAStates based on these annotations.
         Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state, Offset);
+        // To have the --allow-experimental-pacret flag, we have to add the
+        // OpNegateRAState CFI, and remove it later in MarkRAStates. Unittests
+        // on AArch64 would be broken otherwise, as some AArch64 platforms will
+        // have pac-ret for linker inserted functions, e.g.
+        // __do_global_dtors_aux. The user cannot remove the
+        // .cfi_negate_ra_state from such functions.
+        Function.addCFIInstruction(
+            Offset, MCCFIInstruction::createNegateRAState(nullptr));
         break;
       }
       if (opts::Verbosity >= 1)
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp
index 8121fffb93c9f..4cdeb42240f7f 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/MarkRAStates.cpp
@@ -43,6 +43,25 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
 
   BinaryContext &BC = BF.getBinaryContext();
 
+  // Because of the --allow-experimental-pacret flag,
+  // we cannot remove all OpNegateRAStates at FillCFIInfoFor,
+  // but we still need to remove them here, because their pre-optimization
+  // locations would be incorrect after optimizations.
+  std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend());
+  for (BinaryBasicBlock *BB : Blocks) {
+    for (auto II = BB->begin(); II != BB->end();) {
+      MCInst &Instr = *II;
+      if (BC.MIB->isCFI(Instr)) {
+        const MCCFIInstruction *CFI = BF.getCFIFor(Instr);
+        if (CFI->getOperation() == MCCFIInstruction::OpNegateRAState) {
+          II = BB->erasePseudoInstruction(II);
+          continue;
+        }
+      }
+      ++II;
+    }
+  }
+
   for (BinaryBasicBlock &BB : BF) {
     for (auto It = BB.begin(); It != BB.end(); ++It) {
       MCInst &Inst = *It;
@@ -57,6 +76,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
         BF.setIgnored();
         BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
                   << BF.getPrintName() << "\n";
+        BC.outs()
+            << "BOLT-INFO: ptr sign/auth inst without .cfi_negate_ra_state\n";
         return;
       }
     }
@@ -77,6 +98,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
           // RA signing instructions should only follow unsigned RA state.
           BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
                     << BF.getPrintName() << "\n";
+          BC.outs() << "BOLT-INFO: ptr signing inst encountered in Signed RA "
+                       "state.\n";
           BF.setIgnored();
           return;
         }
@@ -86,6 +109,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) {
           // RA authenticating instructions should only follow signed RA state.
           BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
                     << BF.getPrintName() << "\n";
+          BC.outs() << "BOLT-INFO: ptr authenticating inst encountered in "
+                       "Unsigned RA state.\n";
           BF.setIgnored();
           return;
         }
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index ac50168ff67a8..fb4d2ef0a7de2 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -275,6 +275,11 @@ static cl::opt<bool> ShortenInstructions("shorten-instructions",
                                          cl::desc("shorten instructions"),
                                          cl::init(true),
                                          cl::cat(BoltOptCategory));
+
+cl::opt<bool> AllowPacret(
+    "allow-experimental-pacret",
+    cl::desc("Enable processing binaries with pac-ret (experimental)"),
+    cl::cat(BoltOptCategory));
 } // namespace opts
 
 namespace llvm {

>From d80694bb7f90e18940f8bba68c69c1ed0c46a615 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Mon, 5 May 2025 14:57:07 +0200
Subject: [PATCH 15/15] [BOLT] Disallow Instrumentation mode for pac-ret
 binaries

- also remove an unneded assertion
---
 bolt/include/bolt/Passes/InsertNegateRAStatePass.h | 1 +
 bolt/lib/Rewrite/BinaryPassManager.cpp             | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
index ce73b5a152d12..0294ded2c51cb 100644
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
@@ -38,6 +38,7 @@ class InsertNegateRAState : public BinaryFunctionPass {
   /// Because states are tracked as MCAnnotations on individual instructions,
   /// newly inserted instructions do not have a state associated with them.
   /// New states are "inherited" from the last known state.
+  /// TODO: implement a better strategy for Instrumentation mode.
   void fixUnknownStates(BinaryFunction &BF);
 };
 
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index fb4d2ef0a7de2..664ad9b72eba2 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -395,9 +395,14 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
 
   Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
 
-  if (opts::Instrument)
+  if (opts::Instrument) {
+    if (opts::AllowPacret) {
+      BC.errs() << "BOLT-ERROR: Instrumenting binaries with pac-ret hardening "
+                   "is not supported.\n";
+      exit(1);
+    }
     Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
-  else if (opts::Hugify)
+  } else if (opts::Hugify)
     Manager.registerPass(std::make_unique<HugePage>(NeverPrint));
 
   Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint),



More information about the llvm-commits mailing list