[clang] [llvm] Add option to generate additional debug info for expression dereferencing pointer to pointers. (PR #81545)

via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 14 20:46:51 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-codegen

Author: William Junda Huang (huangjd)

<details>
<summary>Changes</summary>

Such expression does not correspond to a variable in the source code thus does not have a debug location.  When the user collects perf data on the program, if the intermediate memory load instruction is sampled, it could not be attributed to any variable/class member, which causes the sampling results to be under-counted. 
This patch adds an option  `-fdebug_info_for_pointer_type` to generate a psuedo variable and its debug info for intermediate expression with pointer dereferencing, so that perf data collected on the instruction of that expression can be attributed to the correct class member.

This is a prototype so comments are needed.



---
Full diff: https://github.com/llvm/llvm-project/pull/81545.diff


8 Files Affected:

- (modified) clang/include/clang/Basic/DebugOptions.def (+4) 
- (modified) clang/include/clang/Driver/Options.td (+4) 
- (modified) clang/lib/CodeGen/CGDebugInfo.cpp (+65) 
- (modified) clang/lib/CodeGen/CGDebugInfo.h (+6) 
- (modified) clang/lib/CodeGen/CGExprScalar.cpp (+20-1) 
- (modified) clang/lib/Driver/ToolChains/Clang.cpp (+3) 
- (added) clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp (+120) 
- (modified) llvm/include/llvm/IR/DIBuilder.h (+2) 


``````````diff
diff --git a/clang/include/clang/Basic/DebugOptions.def b/clang/include/clang/Basic/DebugOptions.def
index 7cd3edf08a17ea..6dd09f46842077 100644
--- a/clang/include/clang/Basic/DebugOptions.def
+++ b/clang/include/clang/Basic/DebugOptions.def
@@ -129,6 +129,10 @@ DEBUGOPT(CodeViewCommandLine, 1, 0)
 /// Whether emit extra debug info for sample pgo profile collection.
 DEBUGOPT(DebugInfoForProfiling, 1, 0)
 
+/// Whether to generate pseudo variables and their debug info for intermediate
+/// pointer accesses.
+DEBUGOPT(DebugInfoForPointerType, 1, 0)
+
 /// Whether to emit .debug_gnu_pubnames section instead of .debug_pubnames.
 DEBUGOPT(DebugNameTable, 2, 0)
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 7f4fa33748faca..96b22d3f7640dd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1675,6 +1675,10 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
           "Emit extra debug info to make sample profile more accurate">,
   NegFlag<SetFalse>>;
+def fdebug_info_for_pointer_type : Flag<["-"], "fdebug-info-for-pointer-type">,
+  Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Generate pseudo variables and their debug info for intermediate pointer accesses">,
+  MarshallingInfoFlag<CodeGenOpts<"DebugInfoForPointerType">>;
 def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
     Group<f_Group>, Visibility<[ClangOption, CLOption]>,
     HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 0f3f684d61dc94..f28e83a3b99ec2 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5636,6 +5636,71 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var,
   Var->addDebugInfo(GVE);
 }
 
+void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
+                                     llvm::Instruction *Value, QualType Ty) {
+  // Only when -g2 or above is specified, debug info for variables will be
+  // generated.
+  if (CGM.getCodeGenOpts().getDebugInfo() <=
+      llvm::codegenoptions::DebugLineTablesOnly)
+    return;
+
+  llvm::DIFile *Unit = Builder.getCurrentDebugLocation()->getFile();
+  llvm::DIType *Type = getOrCreateType(Ty, Unit);
+
+  // Check if Value is already a declared variable and has debug info, in this
+  // case we have nothing to do. Clang emits declared variable as alloca, and
+  // it is loaded upon use, so we identify such pattern here.
+  if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(Value)) {
+    llvm::Value *Var = Load->getPointerOperand();
+    if (llvm::Metadata *MDValue = llvm::ValueAsMetadata::getIfExists(Var)) {
+      if (llvm::Value *DbgValue = llvm::MetadataAsValue::getIfExists(
+              CGM.getLLVMContext(), MDValue)) {
+        for (llvm::User *U : DbgValue->users()) {
+          if (llvm::CallInst *DbgDeclare = dyn_cast<llvm::CallInst>(U)) {
+            if (DbgDeclare->getCalledFunction() == DBuilder.GetDeclareFn() &&
+                DbgDeclare->getArgOperand(0) == DbgValue) {
+              // There can be implicit type cast applied on a variable if it is
+              // an opaque ptr, in this case its debug info may not match the
+              // actual type of object being used as in the next instruction, so
+              // we will need to emit a pseudo variable for type-casted value.
+              llvm::DILocalVariable *MDNode = dyn_cast<llvm::DILocalVariable>(
+                  dyn_cast<llvm::MetadataAsValue>(DbgDeclare->getOperand(1))
+                      ->getMetadata());
+              if (MDNode->getType() == Type)
+                return;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Insert a sequence of instructions to materialize Value on the stack.
+  auto SaveInsertionPoint = Builder.saveIP();
+  Builder.SetInsertPoint(++(Value->getIterator()));
+  Builder.SetCurrentDebugLocation(Value->getDebugLoc());
+  llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(Value->getType());
+  Address PseudoVarAddr(PseudoVar, Value->getType(),
+                        CharUnits::fromQuantity(PseudoVar->getAlign()));
+  llvm::LoadInst *Load = Builder.CreateLoad(PseudoVarAddr);
+  Value->replaceAllUsesWith(Load);
+  Builder.SetInsertPoint(Load);
+  Builder.CreateStore(Value, PseudoVarAddr);
+
+  // Emit debug info for materialized Value.
+  unsigned Line = Builder.getCurrentDebugLocation().getLine();
+  unsigned Column = Builder.getCurrentDebugLocation().getCol();
+  llvm::DILocalVariable *D = DBuilder.createAutoVariable(
+      LexicalBlockStack.back(), "pseudo_var", Unit, Line, Type);
+  llvm::DILocation *DIL =
+      llvm::DILocation::get(CGM.getLLVMContext(), Line, Column,
+                            LexicalBlockStack.back(), CurInlinedAt);
+  SmallVector<uint64_t> Expr;
+  DBuilder.insertDeclare(PseudoVar, D, DBuilder.createExpression(Expr), DIL,
+                         Load);
+  Builder.restoreIP(SaveInsertionPoint);
+}
+
 void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,
                                   const GlobalDecl GD) {
 
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 7b60e94555d060..2756ea1e7b4f1e 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -529,6 +529,12 @@ class CGDebugInfo {
   /// Emit information about an external variable.
   void EmitExternalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl);
 
+  /// Emit a pseudo variable and debug info for an intermediate value if it does
+  /// not correspond to a variable in the source code, so that a profiler can
+  /// track more accurate usage of certain instructions of interest.
+  void EmitPseudoVariable(CGBuilderTy &Builder, llvm::Instruction *Value,
+                          QualType Ty);
+
   /// Emit information about global variable alias.
   void EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl Decl);
 
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 181b15e9c7d0a7..cad79e77052842 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1787,7 +1787,26 @@ Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) {
     }
   }
 
-  return EmitLoadOfLValue(E);
+  llvm::Value *Result = EmitLoadOfLValue(E);
+
+  // If -fdebug-info-for-pointer-type is specified, emit a pseudo variable and
+  // its debug info for the pointer, even if there is no variable associated
+  // with the pointer's expression.
+  if (CGF.CGM.getCodeGenOpts().DebugInfoForPointerType && CGF.getDebugInfo()) {
+    if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(Result)) {
+      if (llvm::GetElementPtrInst *GEP =
+              dyn_cast<llvm::GetElementPtrInst>(Load->getPointerOperand())) {
+        if (llvm::Instruction *Pointer =
+                dyn_cast<llvm::Instruction>(GEP->getPointerOperand())) {
+          QualType Ty = E->getBase()->getType();
+          if (!E->isArrow())
+            Ty = CGF.getContext().getPointerType(Ty);
+          CGF.getDebugInfo()->EmitPseudoVariable(Builder, Pointer, Ty);
+        }
+      }
+    }
+  }
+  return Result;
 }
 
 Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index bcba7cbbdb58c2..7882c4f1225f1f 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4256,6 +4256,9 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, const llvm::Triple &T,
   // decision should be made in the driver as well though.
   llvm::DebuggerKind DebuggerTuning = TC.getDefaultDebuggerTuning();
 
+  if (Args.hasArg(options::OPT_fdebug_info_for_pointer_type))
+    CmdArgs.push_back("-fdebug-info-for-pointer-type");
+
   bool SplitDWARFInlining =
       Args.hasFlag(options::OPT_fsplit_dwarf_inlining,
                    options::OPT_fno_split_dwarf_inlining, false);
diff --git a/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp b/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp
new file mode 100644
index 00000000000000..6758ef445d463a
--- /dev/null
+++ b/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp
@@ -0,0 +1,120 @@
+// Test debug info for intermediate value of a chained pointer deferencing
+// expression when the flag -fdebug-info-for-pointer-type is enabled.
+// RUN: %clang_cc1 %s -fdebug-info-for-pointer-type -debug-info-kind=constructor -S -emit-llvm -o - | FileCheck %s
+
+class A {
+public:
+  int i;
+  char c;
+  void *p;
+  int arr[3];
+};
+
+class B {
+public:
+  A* a;
+};
+
+class C {
+public:
+  B* b;
+  A* a;
+  A arr[10];
+};
+
+// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func1{{.*}}(
+// CHECK:         [[A_ADDR:%.*]] = getelementptr inbounds %class.B, ptr {{%.*}}, i32 0, i32 0, !dbg [[DBG1:![0-9]+]]
+// CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[A_ADDR]], align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr, align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    store ptr [[A]], ptr [[PSEUDO1]], align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META1:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 0,
+int func1(B *b) {
+  return b->a->i;
+}
+
+// Should generate a pseudo variable when pointer is type-casted.
+// CHECK-LABEL: define dso_local noundef ptr @{{.*}}func2{{.*}}(
+// CHECK:         call void @llvm.dbg.declare(metadata ptr [[B_ADDR:%.*]], metadata [[META2:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[B:%.*]] = load ptr, ptr [[B_ADDR]],
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[B]], ptr [[PSEUDO1]],
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META3:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]],
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.B, ptr [[TMP1]], i32 0,
+A* func2(void *b) {
+  return ((B*)b)->a;
+}
+
+// Should not generate pseudo variable in this case.
+// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func3{{.*}}(
+// CHECK:    call void @llvm.dbg.declare(metadata ptr [[B_ADDR:%.*]], metadata [[META4:![0-9]+]], metadata !DIExpression())
+// CHECK:    call void @llvm.dbg.declare(metadata ptr [[LOCAL1:%.*]], metadata [[META5:![0-9]+]], metadata !DIExpression())
+// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr
+int func3(B *b) {
+  A *local1 = b->a;
+  return local1->i;
+}
+
+// CHECK-LABEL: define dso_local noundef signext i8 @{{.*}}func4{{.*}}(
+// CHECK:         [[A_ADDR:%.*]] = getelementptr inbounds %class.C, ptr {{%.*}}, i32 0, i32 1
+// CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[A_ADDR]],
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[A]], ptr [[PSEUDO1]],
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META6:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]],
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 0,
+// CHECK:         [[CALL:%.*]] = call noundef ptr @{{.*}}foo{{.*}}(
+// CHECK-NEXT:    [[PSEUDO2:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[CALL]], ptr [[PSEUDO2]]
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO2]], metadata [[META6]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PSEUDO2]]
+// CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds %class.A, ptr [[TMP2]], i32 0, i32 1
+char func4(C *c) {
+  extern A* foo(int x);
+  return foo(c->a->i)->c;
+}
+
+// CHECK-LABEL: define dso_local noundef signext i8 @{{.*}}func5{{.*}}(
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META7:![0-9]+]], metadata !DIExpression())
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META8:![0-9]+]], metadata !DIExpression())
+// CHECK:         [[A_ADDR:%.*]] = getelementptr inbounds %class.A, ptr {{%.*}}, i64 {{%.*}},
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[A_ADDR]], ptr [[PSEUDO1]],
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META9:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]],
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 1,
+char func5(void *arr, int n) {
+  return ((A*)arr)[n].c;
+}
+
+// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func6{{.*}}(
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META10:![0-9]+]], metadata !DIExpression())
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META11:![0-9]+]], metadata !DIExpression())
+int func6(B &b) {
+  return reinterpret_cast<A&>(b).i;
+}
+
+// CHECK-DAG: [[META_A:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "A",
+// CHECK-DAG: [[META_AP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_A]],
+// CHECK-DAG: [[META_B:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "B",
+// CHECK-DAG: [[META_BP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_B]],
+// CHECK-DAG: [[META_C:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "C",
+// CHECK-DAG: [[META_CP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_C]],
+// CHECK-DAG: [[META_VP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null,
+// CHECK-DAG: [[META_I32:![0-9]+]] = !DIBasicType(name: "int", size: 32,
+// CHECK-DAG: [[META_BR:![0-9]+]] = !DIDerivedType(tag: DW_TAG_reference_type, baseType: [[META_B]],
+
+// CHECK-DAG: [[DBG1]] = !DILocation(line: 34, column: 13,
+// CHECK-DAG: [[META1]] = !DILocalVariable(name: "pseudo_var", scope: {{.*}}, file: {{.*}}, line: 34, type: [[META_AP]])
+// CHECK-DAG: [[META2]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 46, type: [[META_VP]])
+// CHECK-DAG: [[META3]] = !DILocalVariable(name: "pseudo_var", scope: {{.*}}, file: {{.*}}, line: 47, type: [[META_BP]])
+// CHECK-DAG: [[META4]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 55, type: [[META_BP]])
+// CHECK-DAG: [[META5]] = !DILocalVariable(name: "local1", scope: {{.*}}, file: {{.*}}, line: 56, type: [[META_AP]])
+// CHECK-DAG: [[META6]] = !DILocalVariable(name: "pseudo_var", scope: {{.*}}, file: {{.*}}, line: 76, type: [[META_AP]])
+// CHECK-DAG: [[META7]] = !DILocalVariable(name: "arr", arg: 1, scope: {{.*}}, file: {{.*}}, line: 88, type: [[META_VP]])
+// CHECK-DAG: [[META8]] = !DILocalVariable(name: "n", arg: 2, scope: {{.*}}, file: {{.*}}, line: 88, type: [[META_I32]])
+// CHECK-DAG: [[META9]] = !DILocalVariable(name: "pseudo_var", scope: {{.*}}, file: {{.*}}, line: 89, type: [[META_AP]])
+// CHECK-DAG: [[META10]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 95, type: [[META_BR]])
+// CHECK-DAG: [[META11]] = !DILocalVariable(name: "pseudo_var", scope: {{.*}}, file: {{.*}}, line: 96, type: [[META_AP]])
diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index edec161b397155..e420f73e152907 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -1024,6 +1024,8 @@ namespace llvm {
       N->replaceAllUsesWith(Replacement);
       return Replacement;
     }
+
+    Function *GetDeclareFn() { return DeclareFn; }
   };
 
   // Create wrappers for C Binding types (see CBindingWrapping.h).

``````````

</details>


https://github.com/llvm/llvm-project/pull/81545


More information about the cfe-commits mailing list