[clang] Add option to generate additional debug info for expression dereferencing pointer to pointers. (PR #81545)

William Junda Huang via cfe-commits cfe-commits at lists.llvm.org
Mon Feb 12 14:35:17 PST 2024


https://github.com/huangjd created https://github.com/llvm/llvm-project/pull/81545

Such expression does not correspond to a variable in the source code thus does not have a debug location.  When the user collects perf data on the program, if the intermediate memory load instruction is sampled, it could not be attributed to any variable/class member, which causes the sampling results to be under-counted. 
This patch adds an option  `-fdebug_info_for_pointer_type` to generate a psuedo variable and its debug info for intermediate expression with pointer dereferencing, so that perf data collected on the instruction of that expression can be attributed to the correct class member.

This is a prototype so comments are needed.



>From f2c82758e1cba7773e41d941d2812c829c339675 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Mon, 12 Feb 2024 02:27:13 -0500
Subject: [PATCH] Add option to generate additional info for expression
 containing pointer of pointers.

Such expression does correspond to a variable in the source code thus
does not have a debug location. However the user may want to collect
sampling counter for memory accesses to analyze usage frequency of class
members. By enabling -fdebug_info_for_pointer_type a psuedo variable and
its debug info is generated in place whenever there's an intermediate
expression with pointer access.
---
 clang/include/clang/Basic/DebugOptions.def |  4 ++
 clang/include/clang/Driver/Options.td      |  4 ++
 clang/lib/CodeGen/CGDebugInfo.cpp          | 16 +++++
 clang/lib/CodeGen/CGDebugInfo.h            |  6 ++
 clang/lib/CodeGen/CGDecl.cpp               |  4 ++
 clang/lib/CodeGen/CGExpr.cpp               | 79 ++++++++++++++++++++++
 clang/lib/CodeGen/CodeGenFunction.h        |  5 ++
 clang/lib/Driver/ToolChains/Clang.cpp      |  3 +
 8 files changed, 121 insertions(+)

diff --git a/clang/include/clang/Basic/DebugOptions.def b/clang/include/clang/Basic/DebugOptions.def
index 7cd3edf08a17ea..6dd09f46842077 100644
--- a/clang/include/clang/Basic/DebugOptions.def
+++ b/clang/include/clang/Basic/DebugOptions.def
@@ -129,6 +129,10 @@ DEBUGOPT(CodeViewCommandLine, 1, 0)
 /// Whether emit extra debug info for sample pgo profile collection.
 DEBUGOPT(DebugInfoForProfiling, 1, 0)
 
+/// Whether to generate pseudo variables and their debug info for intermediate
+/// pointer accesses.
+DEBUGOPT(DebugInfoForPointerType, 1, 0)
+
 /// Whether to emit .debug_gnu_pubnames section instead of .debug_pubnames.
 DEBUGOPT(DebugNameTable, 2, 0)
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 7f4fa33748faca..96b22d3f7640dd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1675,6 +1675,10 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
           "Emit extra debug info to make sample profile more accurate">,
   NegFlag<SetFalse>>;
+def fdebug_info_for_pointer_type : Flag<["-"], "fdebug-info-for-pointer-type">,
+  Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Generate pseudo variables and their debug info for intermediate pointer accesses">,
+  MarshallingInfoFlag<CodeGenOpts<"DebugInfoForPointerType">>;
 def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
     Group<f_Group>, Visibility<[ClangOption, CLOption]>,
     HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 0f3f684d61dc94..6ce40da22dc97d 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5636,6 +5636,22 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var,
   Var->addDebugInfo(GVE);
 }
 
+void CGDebugInfo::EmitPseudoVariable(llvm::AllocaInst *Alloca, QualType Ty,
+                                     SourceLocation Loc) {
+  llvm::DIFile *Unit = getOrCreateFile(Loc);
+  unsigned Line = getLineNumber(Loc);
+  unsigned Column = getColumnNumber(Loc);
+  llvm::DILocalVariable *D = DBuilder.createAutoVariable(
+      LexicalBlockStack.back(), Alloca->getName(), getOrCreateFile(Loc), Line,
+      getOrCreateType(Ty, Unit));
+  llvm::DILocation *DIL =
+      llvm::DILocation::get(CGM.getLLVMContext(), Line, Column,
+                            LexicalBlockStack.back(), CurInlinedAt);
+  SmallVector<uint64_t> Expr;
+  DBuilder.insertDeclare(Alloca, D, DBuilder.createExpression(Expr), DIL,
+                         Alloca->getParent());
+}
+
 void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,
                                   const GlobalDecl GD) {
 
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 7b60e94555d060..a2c484f50b2bc5 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -529,6 +529,12 @@ class CGDebugInfo {
   /// Emit information about an external variable.
   void EmitExternalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl);
 
+  /// Emit debug information for a pseudo variable assigned to the value of an
+  /// intermediate expression, so that a performance counter can track the usage
+  /// of a specific expression of interest.
+  void EmitPseudoVariable(llvm::AllocaInst *Alloca, QualType Ty,
+                          SourceLocation Loc);
+
   /// Emit information about global variable alias.
   void EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl Decl);
 
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244..5f7b2529179003 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -793,6 +793,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
   Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime();
   if (!lifetime) {
     llvm::Value *value = EmitScalarExpr(init);
+    if (CGM.getCodeGenOpts().getDebugInfo() >
+            llvm::codegenoptions::DebugLineTablesOnly &&
+        CGM.getCodeGenOpts().DebugInfoForPointerType)
+      value = UnemitPseudoVariable(value);
     if (capturedByInit)
       drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
     EmitNullabilityCheck(lvalue, value, init->getExprLoc());
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index c5f6b6d3a99f0b..b979c0830c5b34 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -951,6 +951,58 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
   return nullptr;
 }
 
+/// When a pseudo variable is created for %1, it generates these instructions
+/// in sequence and return %2:
+/// %pseudo = alloca Ty
+/// call void @llvm.dbg.declare(metadata ptr %pseudo, metadata, metadata)
+/// store Ty %1, ptr %pseudo
+/// %2 = load ptr, ptr %pseudo
+/// To undo, we detect and remove this sequence, and replace %2 back to %1.
+llvm::Value *CodeGenFunction::UnemitPseudoVariable(llvm::Value *V) {
+  if (!getDebugInfo())
+    return V;
+
+  if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(V)) {
+    llvm::Value *PseudoVar = Load->getPointerOperand();
+    if (llvm::StoreInst *Store =
+            dyn_cast<llvm::StoreInst>(Load->getPrevNode())) {
+      if (Store->getPointerOperand() != PseudoVar)
+        return V;
+      llvm::Value *OriginalValue = Store->getValueOperand();
+      if (llvm::CallInst *DbgCall =
+              dyn_cast<llvm::CallInst>(Store->getPrevNode())) {
+        if (DbgCall->getCalledFunction() !=
+                llvm::Intrinsic::getDeclaration(&CGM.getModule(),
+                                                llvm::Intrinsic::dbg_declare) ||
+            DbgCall->getNumOperands() != 4)
+          return V;
+        for (int i = 0; i < 3; i++) {
+          if (!isa<llvm::MetadataAsValue>(DbgCall->getArgOperand(i)))
+            return V;
+        }
+        if (llvm::MetadataAsValue *Metadata =
+                dyn_cast<llvm::MetadataAsValue>(DbgCall->getOperand(0))) {
+          if (llvm::ValueAsMetadata *Value =
+                  dyn_cast<llvm::ValueAsMetadata>(Metadata->getMetadata())) {
+            if (Value->getValue() != PseudoVar)
+              return V;
+            if (llvm::AllocaInst *Alloca =
+                    dyn_cast<llvm::AllocaInst>(DbgCall->getPrevNode())) {
+              V->replaceAllUsesWith(OriginalValue);
+              Load->eraseFromParent();
+              Store->eraseFromParent();
+              DbgCall->eraseFromParent();
+              Alloca->eraseFromParent();
+              return OriginalValue;
+            }
+          }
+        }
+      }
+    }
+  }
+  return V;
+}
+
 namespace {
 
 /// \p StructAccessBase returns the base \p Expr of a field access. It returns
@@ -2015,6 +2067,29 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
                         llvm::MDNode::get(getLLVMContext(), std::nullopt));
     }
 
+  // if -g2 or above and -fdebug-info-for-pointer-type are enabled, emit
+  // additional debug info for loads in an intermediate expression, which allows
+  // a performance counter to deduce the type of the value being loaded, even if
+  // it does not correspond to a variable in the source code.
+  // Since there is no variable correspond to an intermediate expression, we
+  // create a pseudo variable for it and emit its debug info, as if the
+  // expression were written in SSA form.
+  if (CGM.getCodeGenOpts().getDebugInfo() > llvm::codegenoptions::DebugLineTablesOnly &&
+      CGM.getCodeGenOpts().DebugInfoForPointerType) {
+    if (CGDebugInfo *DI = getDebugInfo())
+      // We only generate this debug info if loading from GEP, not from other
+      // cases such as loading a function argument.
+      if (isa<llvm::GetElementPtrInst>(Load->getOperand(0))) {
+        const llvm::DebugLoc &DebugLoc = Load->getDebugLoc();
+        llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(
+            Load->getType(), nullptr, Twine("pseudo_").concat(Twine(DebugLoc.getLine())).concat("_").concat(Twine(DebugLoc.getCol())));
+        DI->EmitPseudoVariable(PseudoVar, Ty, Loc);
+        Address PseudoVarAddr(PseudoVar, Load->getType(), Addr.getAlignment());
+        Builder.CreateStore(Load, PseudoVarAddr);
+        Load = Builder.CreateLoad(PseudoVarAddr);
+      }
+  }
+
   return EmitFromMemory(Load, Ty);
 }
 
@@ -5569,6 +5644,10 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
     }
 
     RValue RV = EmitAnyExpr(E->getRHS());
+    if (CGM.getCodeGenOpts().getDebugInfo() > llvm::codegenoptions::DebugLineTablesOnly &&
+        CGM.getCodeGenOpts().DebugInfoForPointerType)
+      if (isa<DeclRefExpr>(E->getLHS()) && RV.isScalar())
+        RV = RValue::get(UnemitPseudoVariable(RV.getScalarVal()));
     LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store);
     if (RV.isScalar())
       EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc());
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 143ad64e8816b1..36a572ace2ef64 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3104,6 +3104,11 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Get the record field index as represented in debug info.
   unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex);
 
+  /// When the result of EmitLoadOfScalar is immediately assigned to a declared
+  /// variable, the pseudo variable emitted for it (when the flag
+  /// -fdebug-info-for-pointer-type is specified) should be undone since there
+  /// is already a debug value emitted for the declared variable.
+  llvm::Value *UnemitPseudoVariable(llvm::Value *V);
 
   //===--------------------------------------------------------------------===//
   //                            Declaration Emission
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index bcba7cbbdb58c2..7882c4f1225f1f 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4256,6 +4256,9 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, const llvm::Triple &T,
   // decision should be made in the driver as well though.
   llvm::DebuggerKind DebuggerTuning = TC.getDefaultDebuggerTuning();
 
+  if (Args.hasArg(options::OPT_fdebug_info_for_pointer_type))
+    CmdArgs.push_back("-fdebug-info-for-pointer-type");
+
   bool SplitDWARFInlining =
       Args.hasFlag(options::OPT_fsplit_dwarf_inlining,
                    options::OPT_fno_split_dwarf_inlining, false);



More information about the cfe-commits mailing list