[clang] aeccfee - Add option to generate additional debug info for expression dereferencing pointer to pointers. (#81545)

via cfe-commits cfe-commits at lists.llvm.org
Wed May 29 15:04:15 PDT 2024


Author: William Junda Huang
Date: 2024-05-29T18:04:11-04:00
New Revision: aeccfee348c717165541d8d895b9b0cdfe31415c

URL: https://github.com/llvm/llvm-project/commit/aeccfee348c717165541d8d895b9b0cdfe31415c
DIFF: https://github.com/llvm/llvm-project/commit/aeccfee348c717165541d8d895b9b0cdfe31415c.diff

LOG: Add option to generate additional debug info for expression dereferencing pointer to pointers. (#81545)

Such expression does not correspond to a variable in the source code
thus does not have a debug location. When the user collects perf data on
the program, if the intermediate memory load instruction is sampled, it
could not be attributed to any variable/class member, which causes the
sampling results to be under-counted.
This patch adds an option `-fdebug_info_for_pointer_type` to generate a
psuedo variable and its debug info for intermediate expression with
pointer dereferencing, so that perf data collected on the instruction of
that expression can be attributed to the correct class member.

This is a prototype so comments are needed.

Added: 
    clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp

Modified: 
    clang/lib/CodeGen/CGDebugInfo.cpp
    clang/lib/CodeGen/CGDebugInfo.h
    clang/lib/CodeGen/CGExprScalar.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 1713f7065e7a2..5f6f911c7a6d6 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5737,6 +5737,90 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var,
   Var->addDebugInfo(GVE);
 }
 
+void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
+                                     llvm::Instruction *Value, QualType Ty) {
+  // Only when -g2 or above is specified, debug info for variables will be
+  // generated.
+  if (CGM.getCodeGenOpts().getDebugInfo() <=
+      llvm::codegenoptions::DebugLineTablesOnly)
+    return;
+
+  llvm::DebugLoc SaveDebugLoc = Builder.getCurrentDebugLocation();
+  if (!SaveDebugLoc.get())
+    return;
+
+  llvm::DIFile *Unit = SaveDebugLoc->getFile();
+  llvm::DIType *Type = getOrCreateType(Ty, Unit);
+
+  // Check if Value is already a declared variable and has debug info, in this
+  // case we have nothing to do. Clang emits declared variable as alloca, and
+  // it is loaded upon use, so we identify such pattern here.
+  if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(Value)) {
+    llvm::Value *Var = Load->getPointerOperand();
+    if (llvm::Metadata *MDValue = llvm::ValueAsMetadata::getIfExists(Var)) {
+      if (llvm::Value *DbgValue = llvm::MetadataAsValue::getIfExists(
+              CGM.getLLVMContext(), MDValue)) {
+        for (llvm::User *U : DbgValue->users()) {
+          if (llvm::CallInst *DbgDeclare = dyn_cast<llvm::CallInst>(U)) {
+            if (DbgDeclare->getCalledFunction()->getIntrinsicID() ==
+                    llvm::Intrinsic::dbg_declare &&
+                DbgDeclare->getArgOperand(0) == DbgValue) {
+              // There can be implicit type cast applied on a variable if it is
+              // an opaque ptr, in this case its debug info may not match the
+              // actual type of object being used as in the next instruction, so
+              // we will need to emit a pseudo variable for type-casted value.
+              llvm::DILocalVariable *MDNode = cast<llvm::DILocalVariable>(
+                  cast<llvm::MetadataAsValue>(DbgDeclare->getOperand(1))
+                      ->getMetadata());
+              if (MDNode->getType() == Type)
+                return;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Find the correct location to insert a sequence of instructions to
+  // materialize Value on the stack.
+  auto SaveInsertionPoint = Builder.saveIP();
+  if (llvm::InvokeInst *Invoke = dyn_cast<llvm::InvokeInst>(Value))
+    Builder.SetInsertPoint(Invoke->getNormalDest()->begin());
+  else if (llvm::Instruction *Next = Value->getIterator()->getNextNode())
+    Builder.SetInsertPoint(Next);
+  else
+    Builder.SetInsertPoint(Value->getParent());
+  llvm::DebugLoc DL = Value->getDebugLoc();
+  if (DL.get())
+    Builder.SetCurrentDebugLocation(DL);
+  else if (!Builder.getCurrentDebugLocation().get())
+    Builder.SetCurrentDebugLocation(SaveDebugLoc);
+
+  llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(Value->getType());
+  Address PseudoVarAddr(PseudoVar, Value->getType(),
+                        CharUnits::fromQuantity(PseudoVar->getAlign()));
+  llvm::LoadInst *Load = Builder.CreateLoad(PseudoVarAddr);
+  Value->replaceAllUsesWith(Load);
+  Builder.SetInsertPoint(Load);
+  Builder.CreateStore(Value, PseudoVarAddr);
+
+  // Emit debug info for materialized Value.
+  unsigned Line = Builder.getCurrentDebugLocation().getLine();
+  unsigned Column = Builder.getCurrentDebugLocation().getCol();
+  llvm::DILocalVariable *D = DBuilder.createAutoVariable(
+      LexicalBlockStack.back(), "", nullptr, 0, Type, false,
+      llvm::DINode::FlagArtificial);
+  llvm::DILocation *DIL =
+      llvm::DILocation::get(CGM.getLLVMContext(), Line, Column,
+                            LexicalBlockStack.back(), CurInlinedAt);
+  SmallVector<uint64_t> Expr;
+  DBuilder.insertDeclare(PseudoVar, D, DBuilder.createExpression(Expr), DIL,
+                         Load);
+
+  Builder.restoreIP(SaveInsertionPoint);
+  Builder.SetCurrentDebugLocation(SaveDebugLoc);
+}
+
 void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,
                                   const GlobalDecl GD) {
 

diff  --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index d6db4d711366a..614316f3fc7fd 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -529,6 +529,12 @@ class CGDebugInfo {
   /// Emit information about an external variable.
   void EmitExternalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl);
 
+  /// Emit a pseudo variable and debug info for an intermediate value if it does
+  /// not correspond to a variable in the source code, so that a profiler can
+  /// track more accurate usage of certain instructions of interest.
+  void EmitPseudoVariable(CGBuilderTy &Builder, llvm::Instruction *Value,
+                          QualType Ty);
+
   /// Emit information about global variable alias.
   void EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl Decl);
 

diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 1b144c178ce96..58f0a3113b4f8 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1937,7 +1937,26 @@ Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) {
     }
   }
 
-  return EmitLoadOfLValue(E);
+  llvm::Value *Result = EmitLoadOfLValue(E);
+
+  // If -fdebug-info-for-profiling is specified, emit a pseudo variable and its
+  // debug info for the pointer, even if there is no variable associated with
+  // the pointer's expression.
+  if (CGF.CGM.getCodeGenOpts().DebugInfoForProfiling && CGF.getDebugInfo()) {
+    if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(Result)) {
+      if (llvm::GetElementPtrInst *GEP =
+              dyn_cast<llvm::GetElementPtrInst>(Load->getPointerOperand())) {
+        if (llvm::Instruction *Pointer =
+                dyn_cast<llvm::Instruction>(GEP->getPointerOperand())) {
+          QualType Ty = E->getBase()->getType();
+          if (!E->isArrow())
+            Ty = CGF.getContext().getPointerType(Ty);
+          CGF.getDebugInfo()->EmitPseudoVariable(Builder, Pointer, Ty);
+        }
+      }
+    }
+  }
+  return Result;
 }
 
 Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {

diff  --git a/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp b/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp
new file mode 100644
index 0000000000000..8e465a1febf7c
--- /dev/null
+++ b/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp
@@ -0,0 +1,120 @@
+// Test debug info for intermediate value of a chained pointer deferencing
+// expression when the flag -fdebug-info-for-pointer-type is enabled.
+// RUN: %clang_cc1 %s -fdebug-info-for-profiling -debug-info-kind=constructor -S -emit-llvm -o - | FileCheck %s
+
+class A {
+public:
+  int i;
+  char c;
+  void *p;
+  int arr[3];
+};
+
+class B {
+public:
+  A* a;
+};
+
+class C {
+public:
+  B* b;
+  A* a;
+  A arr[10];
+};
+
+// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func1{{.*}}(
+// CHECK:         [[A_ADDR:%.*]] = getelementptr inbounds %class.B, ptr {{%.*}}, i32 0, i32 0, !dbg [[DBG1:![0-9]+]]
+// CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[A_ADDR]], align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr, align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    store ptr [[A]], ptr [[PSEUDO1]], align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META1:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 0,
+int func1(B *b) {
+  return b->a->i;
+}
+
+// Should generate a pseudo variable when pointer is type-casted.
+// CHECK-LABEL: define dso_local noundef ptr @{{.*}}func2{{.*}}(
+// CHECK:         call void @llvm.dbg.declare(metadata ptr [[B_ADDR:%.*]], metadata [[META2:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[B:%.*]] = load ptr, ptr [[B_ADDR]],
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[B]], ptr [[PSEUDO1]],
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META3:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]],
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.B, ptr [[TMP1]], i32 0,
+A* func2(void *b) {
+  return ((B*)b)->a;
+}
+
+// Should not generate pseudo variable in this case.
+// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func3{{.*}}(
+// CHECK:    call void @llvm.dbg.declare(metadata ptr [[B_ADDR:%.*]], metadata [[META4:![0-9]+]], metadata !DIExpression())
+// CHECK:    call void @llvm.dbg.declare(metadata ptr [[LOCAL1:%.*]], metadata [[META5:![0-9]+]], metadata !DIExpression())
+// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr
+int func3(B *b) {
+  A *local1 = b->a;
+  return local1->i;
+}
+
+// CHECK-LABEL: define dso_local noundef signext i8 @{{.*}}func4{{.*}}(
+// CHECK:         [[A_ADDR:%.*]] = getelementptr inbounds %class.C, ptr {{%.*}}, i32 0, i32 1
+// CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[A_ADDR]],
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[A]], ptr [[PSEUDO1]],
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META6:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]],
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 0,
+// CHECK:         [[CALL:%.*]] = call noundef ptr @{{.*}}foo{{.*}}(
+// CHECK-NEXT:    [[PSEUDO2:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[CALL]], ptr [[PSEUDO2]]
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO2]], metadata [[META6]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PSEUDO2]]
+// CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds %class.A, ptr [[TMP2]], i32 0, i32 1
+char func4(C *c) {
+  extern A* foo(int x);
+  return foo(c->a->i)->c;
+}
+
+// CHECK-LABEL: define dso_local noundef signext i8 @{{.*}}func5{{.*}}(
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META7:![0-9]+]], metadata !DIExpression())
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META8:![0-9]+]], metadata !DIExpression())
+// CHECK:         [[A_ADDR:%.*]] = getelementptr inbounds %class.A, ptr {{%.*}}, i64 {{%.*}},
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[A_ADDR]], ptr [[PSEUDO1]],
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META9:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]],
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 1,
+char func5(void *arr, int n) {
+  return ((A*)arr)[n].c;
+}
+
+// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func6{{.*}}(
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META10:![0-9]+]], metadata !DIExpression())
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META11:![0-9]+]], metadata !DIExpression())
+int func6(B &b) {
+  return reinterpret_cast<A&>(b).i;
+}
+
+// CHECK-DAG: [[META_A:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "A",
+// CHECK-DAG: [[META_AP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_A]],
+// CHECK-DAG: [[META_B:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "B",
+// CHECK-DAG: [[META_BP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_B]],
+// CHECK-DAG: [[META_C:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "C",
+// CHECK-DAG: [[META_CP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_C]],
+// CHECK-DAG: [[META_VP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null,
+// CHECK-DAG: [[META_I32:![0-9]+]] = !DIBasicType(name: "int", size: 32,
+// CHECK-DAG: [[META_BR:![0-9]+]] = !DIDerivedType(tag: DW_TAG_reference_type, baseType: [[META_B]],
+
+// CHECK-DAG: [[DBG1]] = !DILocation(line: 34, column: 13,
+// CHECK-DAG: [[META1]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial)
+// CHECK-DAG: [[META2]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 46, type: [[META_VP]])
+// CHECK-DAG: [[META3]] = !DILocalVariable(scope: {{.*}}, type: [[META_BP]], flags: DIFlagArtificial)
+// CHECK-DAG: [[META4]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 55, type: [[META_BP]])
+// CHECK-DAG: [[META5]] = !DILocalVariable(name: "local1", scope: {{.*}}, file: {{.*}}, line: 56, type: [[META_AP]])
+// CHECK-DAG: [[META6]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial)
+// CHECK-DAG: [[META7]] = !DILocalVariable(name: "arr", arg: 1, scope: {{.*}}, file: {{.*}}, line: 88, type: [[META_VP]])
+// CHECK-DAG: [[META8]] = !DILocalVariable(name: "n", arg: 2, scope: {{.*}}, file: {{.*}}, line: 88, type: [[META_I32]])
+// CHECK-DAG: [[META9]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial)
+// CHECK-DAG: [[META10]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 95, type: [[META_BR]])
+// CHECK-DAG: [[META11]] = !DILocalVariable(scope: {{.*}}, type: [[META_AP]], flags: DIFlagArtificial)


        


More information about the cfe-commits mailing list