[llvm] [KeyInstr][DwarfDebug] Add is_stmt emission support (PR #133495)

Fri May 9 04:23:53 PDT 2025

================
@@ -2333,6 +2352,170 @@ DwarfDebug::emitInitialLocDirective(const MachineFunction &MF, unsigned CUID) {
   return PrologEndLoc;
 }
 
+void DwarfDebug::findKeyInstructions(const MachineFunction *MF) {
+  // New function - reset KeyInstructions.
+  KeyInstructions.clear();
+
+  // The current candidate is_stmt instructions for each source atom.
+  // Map {(InlinedAt, Group): (Rank, Instructions)}.
+  DenseMap<std::pair<DILocation *, uint32_t>,
+           std::pair<uint16_t, SmallVector<const MachineInstr *>>>
+      GroupCandidates;
+
+  // For each instruction:
+  //   * Skip insts without DebugLoc, AtomGroup or AtomRank, and line zeros.
+  //   * Check if insts in this group have been seen already in GroupCandidates.
+  //     * If this instr rank is equal, add this instruction to KeyInstructions.
+  //       Remove existing instructions from KeyInstructions if they have the
+  //       same parent.
+  //     * If this instr rank is higher (lower precedence), ignore it.
+  //     * If this instr rank is lower (higher precedence), erase existing
+  //       instructions from KeyInstructions. Add this instr to KeyInstructions.
+
+  for (auto &MBB : *MF) {
+    // Rather than apply is_stmt directly to Key Instructions, we "float"
+    // is_stmt up to the 1st instruction with the same line number in a
+    // contiguous block. That instruction is called the "buoy". The
+    // buoy gets reset if we encouner an instruction with an atom
+    // group.
+    const MachineInstr *Buoy = nullptr;
+    // The atom group number associated with Buoy which may be 0 if we haven't
+    // encountered an atom group yet in this blob of instructions with the same
+    // line number.
+    uint64_t BuoyAtom = 0;
+
+    for (auto &MI : MBB) {
+      if (MI.isMetaInstruction())
+        continue;
+
+      if (!MI.getDebugLoc() || !MI.getDebugLoc().getLine())
+        continue;
+
+      // Reset the Buoy to this instruciton if it has a different line number.
+      if (!Buoy ||
+          Buoy->getDebugLoc().getLine() != MI.getDebugLoc().getLine()) {
+        Buoy = &MI;
+        BuoyAtom = 0;
+      }
+
+      // Call instructions are handled specially - we always mark them as key
+      // regardless of atom info.
+      const auto &TII =
+          *MI.getParent()->getParent()->getSubtarget().getInstrInfo();
+      if (MI.isCall() || TII.isTailCall(MI)) {
+        assert(MI.getDebugLoc() && "Unexpectedly missing DL");
+
+        // Calls are always key.
+        KeyInstructions.insert(Buoy);
+
+        uint64_t Group = MI.getDebugLoc()->getAtomGroup();
+        uint8_t Rank = MI.getDebugLoc()->getAtomRank();
+        if (Group && Rank) {
+          auto *InlinedAt = MI.getDebugLoc()->getInlinedAt();
+          auto &[CandidateRank, CandidateInsts] = GroupCandidates[{InlinedAt, Group}];
+
+          // This looks similar to the non-call handling code, except that
+          // we don't put the call into CandidateInsts so that they can't be
+          // made un-key. As a result, we also have to take special care not
+          // to erase the is_stmt from the buoy, and prevent that happening
+          // in the future.
+
+          if (CandidateRank == Rank) {
+            // We've seen other instructions in this group of this rank. Discard
+            // ones we've seen in this block, keep the others.
+            assert(!CandidateInsts.empty());
+            SmallVector<const MachineInstr *> Insts;
+            Insts.reserve(CandidateInsts.size());
+            for (auto &PrevInst : CandidateInsts) {
+              if (PrevInst->getParent() != MI.getParent())
+                Insts.push_back(PrevInst);
+              else if (PrevInst != Buoy)
+                KeyInstructions.erase(PrevInst);
+            }
+
+            if (Insts.empty()) {
+              CandidateInsts.clear();
+              CandidateRank = 0;
+            } else {
+              CandidateInsts = std::move(Insts);
+            }
+
+          } else if (CandidateRank > Rank) {
+            // We've seen other instructions in this group of lower precedence
+            // (higher rank). Discard them.
+            for (auto *Supplanted : CandidateInsts) {
+              // Don't erase the is_stmt we're using for this call.
+              if (Supplanted != Buoy)
+                KeyInstructions.erase(Supplanted);
+            }
+            CandidateInsts.clear();
+            CandidateRank = 0;
+          }
+        }
+
+        // Avoid floating any future is_stmts up to the call.
+        Buoy = nullptr;
+        continue;
+      }
+
+      auto *InlinedAt = MI.getDebugLoc()->getInlinedAt();
+      uint64_t Group = MI.getDebugLoc()->getAtomGroup();
+      uint8_t Rank = MI.getDebugLoc()->getAtomRank();
+      if (!Group || !Rank)
+        continue;
+
+      // Don't let is_stmts float past instructions from different source atoms.
+      if (BuoyAtom && BuoyAtom != Group) {
+        Buoy = &MI;
+        BuoyAtom = MI.getDebugLoc()->getAtomGroup();
+      }
+
+      auto &[CandidateRank, CandidateInsts] = GroupCandidates[{InlinedAt, Group}];
+
+      if (CandidateRank == 0) {
+        // This is the first time we're seeing an instruction in this atom
+        // group. Add it to the map.
+        assert(CandidateInsts.empty());
+        CandidateRank = Rank;
+        CandidateInsts.push_back(Buoy);
+
+      } else if (CandidateRank == Rank) {
+        // We've seen other instructions in this group of this rank. Discard
+        // ones we've seen in this block, keep the others, add this one.
+        assert(!CandidateInsts.empty());
+        SmallVector<const MachineInstr *> Insts;
+        Insts.reserve(CandidateInsts.size() + 1);
+        for (auto &PrevInst : CandidateInsts) {
+          if (PrevInst->getParent() != MI.getParent())
+            Insts.push_back(PrevInst);
+          else
+            KeyInstructions.erase(PrevInst);
+        }
+        Insts.push_back(Buoy);
+        CandidateInsts = std::move(Insts);
----------------
OCHyams wrote:

Does the first comment / question still apply to this new version of the code?

> Also: this adds the buoy each time, so presumably if you have three identically-sourced instructions in a row, you get three buoys in the candidate list?

If they've got the same atom number, no, there's just one. If they're identical except the atom number yes there will be multiple. That's by design, because IMO we should convey that there's multiple key instructions at the source position and let the debugger do what it wants with that info. (Happy to alter the design if needed, just saying this is working as intended)

https://github.com/llvm/llvm-project/pull/133495