[llvm] [BOLT] Improve DWARF CFI generation for pac-ret binaries (PR #163381)

Gergely Bálint via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 28 01:49:19 PST 2025


================
@@ -104,32 +118,122 @@ void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF,
                          MCCFIInstruction::createNegateRAState(nullptr));
 }
 
-void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) {
+std::optional<bool>
+InsertNegateRAState::getFirstKnownRAState(BinaryContext &BC,
+                                          BinaryBasicBlock &BB) {
+  for (const MCInst &Inst : BB) {
+    if (BC.MIB->isCFI(Inst))
+      continue;
+    std::optional<bool> RAState = BC.MIB->getRAState(Inst);
+    if (RAState.has_value())
+      return RAState;
+  }
+  return std::nullopt;
+}
+
+void InsertNegateRAState::fillUnknownStateInBB(BinaryContext &BC,
+                                               BinaryBasicBlock &BB) {
+
+  auto First = BB.getFirstNonPseudo();
+  if (First == BB.end())
+    return;
+  // If the first instruction has unknown RAState, we should copy the first
+  // known RAState.
+  std::optional<bool> RAState = BC.MIB->getRAState(*First);
+  if (!RAState.has_value()) {
+    std::optional<bool> FirstRAState = getFirstKnownRAState(BC, BB);
+    if (!FirstRAState.has_value())
+      // We fill unknown BBs later.
+      return;
+
+    BC.MIB->setRAState(*First, *FirstRAState);
+  }
+
+  // At this point we know the RAState of the first instruction,
+  // so we can propagate the RAStates to all subsequent unknown instructions.
+  MCInst Prev = *First;
+  for (auto It = First + 1; It != BB.end(); ++It) {
+    MCInst &Inst = *It;
+    if (BC.MIB->isCFI(Inst))
+      continue;
+
+    // No need to check for nullopt: we only entered this loop after the first
+    // instruction had its RAState set, and RAState is always set for the
+    // previous instruction in the previous iteration of the loop.
+    std::optional<bool> PrevRAState = BC.MIB->getRAState(Prev);
+
+    std::optional<bool> RAState = BC.MIB->getRAState(Inst);
+    if (!RAState.has_value()) {
+      if (BC.MIB->isPSignOnLR(Prev))
+        PrevRAState = true;
+      else if (BC.MIB->isPAuthOnLR(Prev))
+        PrevRAState = false;
+      BC.MIB->setRAState(Inst, *PrevRAState);
+    }
+    Prev = Inst;
+  }
+}
+
+bool InsertNegateRAState::isUnknownBlock(BinaryContext &BC,
+                                         BinaryBasicBlock &BB) {
+  for (const MCInst &Inst : BB) {
+    if (BC.MIB->isCFI(Inst))
+      continue;
+    std::optional<bool> RAState = BC.MIB->getRAState(Inst);
+    if (RAState.has_value())
+      return false;
+  }
+  return true;
+}
+
+void InsertNegateRAState::markUnknownBlock(BinaryContext &BC,
+                                           BinaryBasicBlock &BB, bool State) {
+  // If we call this when an Instruction has either kRASigned or kRAUnsigned
+  // annotation, setRASigned or setRAUnsigned would fail.
+  assert(isUnknownBlock(BC, BB) &&
+         "markUnknownBlock should only be called on unknown blocks");
+  for (MCInst &Inst : BB) {
+    if (BC.MIB->isCFI(Inst))
+      continue;
+    BC.MIB->setRAState(Inst, State);
+  }
+}
+
+void InsertNegateRAState::fillUnknownStubs(BinaryFunction &BF) {
   BinaryContext &BC = BF.getBinaryContext();
   bool FirstIter = true;
   MCInst PrevInst;
-  for (BinaryBasicBlock &BB : BF) {
-    for (MCInst &Inst : BB) {
-      if (BC.MIB->isCFI(Inst))
-        continue;
+  for (FunctionFragment &FF : BF.getLayout().fragments()) {
+    for (BinaryBasicBlock *BB : FF) {
+      if (!FirstIter && isUnknownBlock(BC, *BB)) {
+        // As explained in issue #160989, the unwind info is incorrect for
+        // stubs. Indicating the correct RAState without the rest of the unwind
+        // info being correct is not useful. Instead, we copy the RAState from
+        // the previous instruction.
+        std::optional<bool> PrevRAState = BC.MIB->getRAState(PrevInst);
+        if (!PrevRAState.has_value()) {
+          llvm_unreachable(
+              "Previous Instruction has no RAState in fillUnknownStubs.");
+          continue;
+        }
 
-      auto RAState = BC.MIB->getRAState(Inst);
-      if (!FirstIter && !RAState) {
         if (BC.MIB->isPSignOnLR(PrevInst))
-          RAState = true;
+          PrevRAState = true;
         else if (BC.MIB->isPAuthOnLR(PrevInst))
-          RAState = false;
-        else {
-          auto PrevRAState = BC.MIB->getRAState(PrevInst);
-          RAState = PrevRAState ? *PrevRAState : false;
-        }
-        BC.MIB->setRAState(Inst, *RAState);
-      } else {
+          PrevRAState = false;
+        markUnknownBlock(BC, *BB, *PrevRAState);
+      }
+      if (FirstIter) {
         FirstIter = false;
-        if (!RAState)
-          BC.MIB->setRAState(Inst, BF.getInitialRAState());
+        if (isUnknownBlock(BC, *BB))
+          // Set block to unsigned, because this is the first block of the
+          // function, and all instruction in it are new instructions generated
+          // by BOLT optimizations.
+          markUnknownBlock(BC, *BB, false);
       }
-      PrevInst = Inst;
+      auto Last = BB->getLastNonPseudo();
+      if (Last != BB->rend())
+        PrevInst = *Last;
----------------
bgergely0 wrote:

Thanks Peter!
I'd prefer to keep the `unreachable`. It is used to indicate the logic error of encountering an instruction that did not get an RAState assigned to it. 

Another reason why PrevInst might not get initialized is if the BB is empty. 
I've encountered situations like this: BOLT would create an empty BB to essentially hold a label to target, and it would fall through to the next block. (I assume the label of the empty block and the label of the next are "merged" into one in the output binary)

I think it could be resolved by changing the `FirstIter` to `FirstKnownInstFound` or similar, and not entering the "main" logic of the function until one known non-pseudo instruction is found. This way all issues coming from these empty/pseudo-only.

https://github.com/llvm/llvm-project/pull/163381


More information about the llvm-commits mailing list