[llvm-branch-commits] [clang] [KeyInstr][Clang] Add ApplyAtomGroup (PR #134632)

Orlando Cazalet-Hyams via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Apr 7 06:47:39 PDT 2025


https://github.com/OCHyams created https://github.com/llvm/llvm-project/pull/134632

This is a scoped helper similar to ApplyDebugLocation that creates a new source
atom group which instructions can be added to.

A source atom is a source construct that is "interesting" for debug stepping
purposes. We use an atom group number to track the instruction(s) that implement
the functionality for the atom, plus backup instructions/source locations.

---

This patch is part of a stack that teaches Clang to generate Key Instructions
metadata for C and C++.

The Key Instructions project is introduced, including a "quick summary" section
at the top which adds context for this PR, here:
https://discourse.llvm.org/t/rfc-improving-is-stmt-placement-for-better-interactive-debugging/82668

The feature is only functional in LLVM if LLVM is built with CMake flag
LLVM_EXPERIMENTAL_KEY_INSTRUCTIONs. Eventually that flag will be removed.

The Clang-side work is demoed here:
https://github.com/llvm/llvm-project/pull/130943

>From 856f99ea6b9daa095733d107915a02722ba9403d Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Tue, 1 Apr 2025 11:59:24 +0100
Subject: [PATCH] [KeyInstr][Clang] Add ApplyAtomGroup

This is a scoped helper similar to ApplyDebugLocation that creates a new source
atom group which instructions can be added to.

A source atom is a source construct that is "interesting" for debug stepping
purposes. We use an atom group number to track the instruction(s) that implement
the functionality for the atom, plus backup instructions/source locations.

---

This patch is part of a stack that teaches Clang to generate Key Instructions
metadata for C and C++.

The Key Instructions project is introduced, including a "quick summary" section
at the top which adds context for this PR, here:
https://discourse.llvm.org/t/rfc-improving-is-stmt-placement-for-better-interactive-debugging/82668

The feature is only functional in LLVM if LLVM is built with CMake flag
LLVM_EXPERIMENTAL_KEY_INSTRUCTIONs. Eventually that flag will be removed.

The Clang-side work is demoed here:
https://github.com/llvm/llvm-project/pull/130943
---
 clang/lib/CodeGen/CGDebugInfo.cpp   | 119 +++++++++++++++++++++++++++-
 clang/lib/CodeGen/CGDebugInfo.h     |  50 ++++++++++++
 clang/lib/CodeGen/CodeGenFunction.h |  14 ++++
 3 files changed, 182 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 54025b767dc81..9fe1f24317b8a 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -43,6 +43,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Metadata.h"
@@ -52,6 +53,7 @@
 #include "llvm/Support/SHA1.h"
 #include "llvm/Support/SHA256.h"
 #include "llvm/Support/TimeProfiler.h"
+#include <cstdint>
 #include <optional>
 using namespace clang;
 using namespace clang::CodeGen;
@@ -119,6 +121,114 @@ CGDebugInfo::~CGDebugInfo() {
          "Region stack mismatch, stack not empty!");
 }
 
+void CGDebugInfo::addInstSourceAtomMetadata(llvm::Instruction *I,
+                                            uint64_t Group, uint8_t Rank) {
+  if (!I->getDebugLoc() || Group == 0 || !I->getDebugLoc()->getLine())
+    return;
+
+  // Saturate the 3-bit rank.
+  Rank = std::min<uint8_t>(Rank, 7);
+
+  const llvm::DebugLoc &DL = I->getDebugLoc();
+
+  // Each instruction can only be attributed to one source atom (a limitation of
+  // the implementation). If this instruction is already part of a source atom,
+  // pick the group in which it has highest precedence (lowest rank).
+  if (DL.get()->getAtomGroup() && DL.get()->getAtomRank() &&
+      DL.get()->getAtomRank() < Rank) {
+    Group = DL.get()->getAtomGroup();
+    Rank = DL.get()->getAtomRank();
+  }
+
+  // Update the function-local watermark so we don't reuse this number for
+  // another atom.
+  KeyInstructionsInfo.HighestEmittedAtom =
+      std::max(Group, KeyInstructionsInfo.HighestEmittedAtom);
+
+  // Apply the new DILocation to the instruction.
+  llvm::DILocation *NewDL = llvm::DILocation::get(
+      I->getContext(), DL.getLine(), DL.getCol(), DL.getScope(),
+      DL.getInlinedAt(), DL.isImplicitCode(), Group, Rank);
+  I->setDebugLoc(NewDL);
+};
+
+void CGDebugInfo::addInstToCurrentSourceAtom(llvm::Instruction *KeyInstruction,
+                                             llvm::Value *Backup) {
+  if (!CGM.getCodeGenOpts().DebugKeyInstructions)
+    return;
+
+  uint64_t Group = KeyInstructionsInfo.CurrentAtom;
+  if (!Group)
+    return;
+
+  addInstSourceAtomMetadata(KeyInstruction, Group, /*Rank=*/1);
+
+  llvm::Instruction *BackupI =
+      llvm::dyn_cast_or_null<llvm::Instruction>(Backup);
+  if (!BackupI)
+    return;
+
+  // Add the backup instruction to the group.
+  addInstSourceAtomMetadata(BackupI, Group, /*Rank=*/2);
+
+  // Look through chains of casts too, as they're probably going to evaporate.
+  // FIXME: And other nops like zero length geps?
+  // FIXME: Should use Cast->isNoopCast()?
+  uint8_t Rank = 3;
+  while (auto *Cast = dyn_cast<llvm::CastInst>(BackupI)) {
+    BackupI = dyn_cast<llvm::Instruction>(Cast->getOperand(0));
+    if (!BackupI)
+      break;
+    addInstSourceAtomMetadata(BackupI, Group, Rank++);
+  }
+}
+
+void CGDebugInfo::addRetToOverrideOrNewSourceAtom(llvm::ReturnInst *Ret,
+                                                  llvm::Value *Backup) {
+  if (KeyInstructionsInfo.RetAtomOverride) {
+    uint64_t CurrentAtom = KeyInstructionsInfo.CurrentAtom;
+    KeyInstructionsInfo.CurrentAtom = KeyInstructionsInfo.RetAtomOverride;
+    addInstToCurrentSourceAtom(Ret, Backup);
+    KeyInstructionsInfo.CurrentAtom = CurrentAtom;
+    KeyInstructionsInfo.RetAtomOverride = 0;
+  } else {
+    auto Grp = ApplyAtomGroup(this);
+    addInstToCurrentSourceAtom(Ret, Backup);
+  }
+}
+
+void CGDebugInfo::setRetInstSourceAtomOverride(uint64_t Group) {
+  assert(KeyInstructionsInfo.RetAtomOverride == 0);
+  KeyInstructionsInfo.RetAtomOverride = Group;
+}
+
+void CGDebugInfo::completeFunction() {
+  // Reset the atom group number tracker as the numbers are function-local.
+  KeyInstructionsInfo.NextAtom = 1;
+  KeyInstructionsInfo.HighestEmittedAtom = 0;
+  KeyInstructionsInfo.CurrentAtom = 0;
+  KeyInstructionsInfo.RetAtomOverride = 0;
+}
+
+ApplyAtomGroup::ApplyAtomGroup(CGDebugInfo *DI) : DI(DI) {
+  if (!DI)
+    return;
+  OriginalAtom = DI->KeyInstructionsInfo.CurrentAtom;
+  DI->KeyInstructionsInfo.CurrentAtom = DI->KeyInstructionsInfo.NextAtom++;
+}
+
+ApplyAtomGroup::~ApplyAtomGroup() {
+  if (!DI)
+    return;
+
+  // We may not have used the group number at all.
+  DI->KeyInstructionsInfo.NextAtom =
+      std::min(DI->KeyInstructionsInfo.HighestEmittedAtom + 1,
+               DI->KeyInstructionsInfo.NextAtom);
+
+  DI->KeyInstructionsInfo.CurrentAtom = OriginalAtom;
+}
+
 ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF,
                                        SourceLocation TemporaryLocation)
     : CGF(&CGF) {
@@ -174,8 +284,15 @@ ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, llvm::DebugLoc Loc)
     return;
   }
   OriginalLocation = CGF.Builder.getCurrentDebugLocation();
-  if (Loc)
+  if (Loc) {
+    // Key Instructions: drop the atom group and rank to avoid accidentally
+    // propagating it around.
+    if (Loc->getAtomGroup())
+      Loc = llvm::DILocation::get(Loc->getContext(), Loc.getLine(),
+                                  Loc->getColumn(), Loc->getScope(),
+                                  Loc->getInlinedAt(), Loc.isImplicitCode());
     CGF.Builder.SetCurrentDebugLocation(std::move(Loc));
+  }
 }
 
 ApplyDebugLocation::~ApplyDebugLocation() {
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 9db5113fe5d8e..e948298d517da 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -58,6 +58,8 @@ class CGBlockInfo;
 class CGDebugInfo {
   friend class ApplyDebugLocation;
   friend class SaveAndRestoreLocation;
+  friend class ApplyAtomGroup;
+
   CodeGenModule &CGM;
   const llvm::codegenoptions::DebugInfoKind DebugKind;
   bool DebugTypeExtRefs;
@@ -179,6 +181,17 @@ class CGDebugInfo {
   /// The key is coroutine real parameters, value is DIVariable in LLVM IR.
   Param2DILocTy ParamDbgMappings;
 
+  /// Key Instructions bookkeeping.
+  /// Source atoms are identified by a {AtomGroup, InlinedAt} pair, meaning
+  /// AtomGroup numbers can be repeated across different functions.
+  struct {
+    uint64_t NextAtom = 1;
+    uint64_t HighestEmittedAtom = 0;
+    uint64_t CurrentAtom = 0;
+    uint64_t RetAtomOverride = 0;
+  } KeyInstructionsInfo;
+
+private:
   /// Helper functions for getOrCreateType.
   /// @{
   /// Currently the checksum of an interface includes the number of
@@ -636,7 +649,30 @@ class CGDebugInfo {
                                                 StringRef Category,
                                                 StringRef FailureMsg);
 
+  /// Reset internal state.
+  void completeFunction();
+
+  /// Add \p KeyInstruction and an optional \p Backup instruction to the
+  /// current atom group, created using ApplyAtomGroup.
+  void addInstToCurrentSourceAtom(llvm::Instruction *KeyInstruction,
+                                  llvm::Value *Backup);
+
+  /// Add \p Ret and an optional \p Backup instruction to the
+  /// saved override used for some ret instructions if it exists, or a new atom.
+  void addRetToOverrideOrNewSourceAtom(llvm::ReturnInst *Ret,
+                                       llvm::Value *Backup);
+
+  /// Set an atom group override for use in addRetToOverrideOrNewSourceAtom.
+  void setRetInstSourceAtomOverride(uint64_t Group);
+
 private:
+  /// Amend \p I's DebugLoc with \p Group (its source atom group) and \p
+  /// Rank (lower nonzero rank is higher precedence). Does nothing if \p I
+  /// has no DebugLoc, and chooses the atom group in which the instruction
+  /// has the highest precedence if it's already in one.
+  void addInstSourceAtomMetadata(llvm::Instruction *I, uint64_t Group,
+                                 uint8_t Rank);
+
   /// Emit call to llvm.dbg.declare for a variable declaration.
   /// Returns a pointer to the DILocalVariable associated with the
   /// llvm.dbg.declare, or nullptr otherwise.
@@ -853,6 +889,20 @@ class CGDebugInfo {
   }
 };
 
+/// A scoped helper to set the current source atom group for
+/// CGDebugInfo::addInstToCurrentSourceAtom. A source atom is a source construct
+/// that is "interesting" for debug stepping purposes. We use an atom group
+/// number to track the instruction(s) that implement the functionality for the
+/// atom, plus backup instructions/source locations.
+class ApplyAtomGroup {
+  uint64_t OriginalAtom = 0;
+  CGDebugInfo *DI = nullptr;
+
+public:
+  ApplyAtomGroup(CGDebugInfo *DI);
+  ~ApplyAtomGroup();
+};
+
 /// A scoped helper to set the current debug location to the specified
 /// location or preferred location of the specified Expr.
 class ApplyDebugLocation {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ca00a0e8c6cf4..0c0019fbea5ea 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1761,6 +1761,20 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// recently incremented counter.
   uint64_t getCurrentProfileCount() { return PGO.getCurrentRegionCount(); }
 
+  /// See CGDebugInfo::addInstToCurrentSourceAtom.
+  void addInstToCurrentSourceAtom(llvm::Instruction *KeyInstruction,
+                                  llvm::Value *Backup) {
+    if (CGDebugInfo *DI = getDebugInfo())
+      DI->addInstToCurrentSourceAtom(KeyInstruction, Backup);
+  }
+
+  /// See CGDebugInfo::addRetToOverrideOrNewSourceAtom.
+  void addRetToOverrideOrNewSourceAtom(llvm::ReturnInst *Ret,
+                                       llvm::Value *Backup) {
+    if (CGDebugInfo *DI = getDebugInfo())
+      DI->addRetToOverrideOrNewSourceAtom(Ret, Backup);
+  }
+
 private:
   /// SwitchInsn - This is nearest current switch instruction. It is null if
   /// current context is not in a switch.



More information about the llvm-branch-commits mailing list